LLVM API Documentation

 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Groups Pages
PPCISelLowering.cpp
Go to the documentation of this file.
1 //===-- PPCISelLowering.cpp - PPC DAG Lowering Implementation -------------===//
2 //
3 // The LLVM Compiler Infrastructure
4 //
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
7 //
8 //===----------------------------------------------------------------------===//
9 //
10 // This file implements the PPCISelLowering class.
11 //
12 //===----------------------------------------------------------------------===//
13 
14 #include "PPCISelLowering.h"
16 #include "PPCMachineFunctionInfo.h"
17 #include "PPCPerfectShuffle.h"
18 #include "PPCTargetMachine.h"
19 #include "PPCTargetObjectFile.h"
20 #include "llvm/ADT/STLExtras.h"
28 #include "llvm/IR/CallingConv.h"
29 #include "llvm/IR/Constants.h"
30 #include "llvm/IR/DerivedTypes.h"
31 #include "llvm/IR/Function.h"
32 #include "llvm/IR/Intrinsics.h"
38 using namespace llvm;
39 
40 static cl::opt<bool> DisablePPCPreinc("disable-ppc-preinc",
41 cl::desc("disable preincrement load/store generation on PPC"), cl::Hidden);
42 
43 static cl::opt<bool> DisableILPPref("disable-ppc-ilp-pref",
44 cl::desc("disable setting the node scheduling preference to ILP on PPC"), cl::Hidden);
45 
46 static cl::opt<bool> DisablePPCUnaligned("disable-ppc-unaligned",
47 cl::desc("disable unaligned load/store generation on PPC"), cl::Hidden);
48 
50  if (TM.getSubtargetImpl()->isDarwin())
51  return new TargetLoweringObjectFileMachO();
52 
53  if (TM.getSubtargetImpl()->isSVR4ABI())
54  return new PPC64LinuxTargetObjectFile();
55 
56  return new TargetLoweringObjectFileELF();
57 }
58 
60  : TargetLowering(TM, CreateTLOF(TM)), PPCSubTarget(*TM.getSubtargetImpl()) {
61  const PPCSubtarget *Subtarget = &TM.getSubtarget<PPCSubtarget>();
62 
64 
65  // Use _setjmp/_longjmp instead of setjmp/longjmp.
68 
69  // On PPC32/64, arguments smaller than 4/8 bytes are extended, so all
70  // arguments are at least 4/8 bytes aligned.
71  bool isPPC64 = Subtarget->isPPC64();
72  setMinStackArgumentAlignment(isPPC64 ? 8:4);
73 
74  // Set up the register classes.
75  addRegisterClass(MVT::i32, &PPC::GPRCRegClass);
76  addRegisterClass(MVT::f32, &PPC::F4RCRegClass);
77  addRegisterClass(MVT::f64, &PPC::F8RCRegClass);
78 
79  // PowerPC has an i16 but no i8 (or i1) SEXTLOAD
82 
84 
85  // PowerPC has pre-inc load and store's.
96 
97  // This is used in the ppcf128->int sequence. Note it has different semantics
98  // from FP_ROUND: that rounds to nearest, this rounds to zero.
100 
101  // We do not currently implement these libm ops for PowerPC.
108 
109  // PowerPC has no SREM/UREM instructions
114 
115  // Don't use SMUL_LOHI/UMUL_LOHI or SDIVREM/UDIVREM to lower SREM/UREM.
124 
125  // We don't support sin/cos/sqrt/fmod/pow
138 
140 
141  // If we're enabling GP optimizations, use hardware square root
142  if (!Subtarget->hasFSQRT() &&
143  !(TM.Options.UnsafeFPMath &&
144  Subtarget->hasFRSQRTE() && Subtarget->hasFRE()))
146 
147  if (!Subtarget->hasFSQRT() &&
148  !(TM.Options.UnsafeFPMath &&
149  Subtarget->hasFRSQRTES() && Subtarget->hasFRES()))
151 
152  if (Subtarget->hasFCPSGN()) {
155  } else {
158  }
159 
160  if (Subtarget->hasFPRND()) {
165 
170  }
171 
172  // PowerPC does not have BSWAP, CTPOP or CTTZ
181 
182  if (Subtarget->hasPOPCNTD()) {
185  } else {
188  }
189 
190  // PowerPC does not have ROTR
193 
194  // PowerPC does not have Select
199 
200  // PowerPC wants to turn select_cc of FP into fsel when possible.
203 
204  // PowerPC wants to optimize integer setcc a bit
206 
207  // PowerPC does not have BRCOND which requires SetCC
209 
211 
212  // PowerPC turns FP_TO_SINT into FCTIWZ and some load/stores.
214 
215  // PowerPC does not have [U|S]INT_TO_FP
218 
223 
224  // We cannot sextinreg(i1). Expand to shifts.
226 
227  // NOTE: EH_SJLJ_SETJMP/_LONGJMP supported here is NOT intended to support
228  // SjLj exception handling but a light-weight setjmp/longjmp replacement to
229  // support continuation, user-level threading, and etc.. As a result, no
230  // other SjLj exception interfaces are implemented and please don't build
231  // your own exception handling based on them.
232  // LLVM/Clang supports zero-cost DWARF exception handling.
235 
236  // We want to legalize GlobalAddress and ConstantPool nodes into the
237  // appropriate instructions to materialize the address.
248 
249  // TRAP is legal.
251 
252  // TRAMPOLINE is custom lowered.
255 
256  // VASTART needs to be custom lowered to use the VarArgsFrameIndex
258 
259  if (Subtarget->isSVR4ABI()) {
260  if (isPPC64) {
261  // VAARG always uses double-word chunks, so promote anything smaller.
271  } else {
272  // VAARG is custom lowered with the 32-bit SVR4 ABI.
275  }
276  } else
278 
279  if (Subtarget->isSVR4ABI() && !isPPC64)
280  // VACOPY is custom lowered with the 32-bit SVR4 ABI.
282  else
284 
285  // Use the default implementation.
291 
292  // We want to custom lower some of our intrinsics.
294 
295  // To handle counter-based loop conditions.
297 
298  // Comparisons that require checking two conditions.
311 
312  if (Subtarget->has64BitSupport()) {
313  // They also have instructions for converting between i64 and fp.
318  // This is just the low 32 bits of a (signed) fp->i64 conversion.
319  // We cannot do this with Promote because i64 is not a legal type.
321 
322  if (PPCSubTarget.hasLFIWAX() || Subtarget->isPPC64())
324  } else {
325  // PowerPC does not have FP_TO_UINT on 32-bit implementations.
327  }
328 
329  // With the instructions enabled under FPCVT, we can do everything.
330  if (PPCSubTarget.hasFPCVT()) {
331  if (Subtarget->has64BitSupport()) {
336  }
337 
342  }
343 
344  if (Subtarget->use64BitRegs()) {
345  // 64-bit PowerPC implementations can support i64 types directly
346  addRegisterClass(MVT::i64, &PPC::G8RCRegClass);
347  // BUILD_PAIR can't be handled natively, and should be expanded to shl/or
349  // 64-bit PowerPC wants to expand i128 shifts itself.
353  } else {
354  // 32-bit PowerPC wants to expand i64 shifts itself.
358  }
359 
360  if (Subtarget->hasAltivec()) {
361  // First set operation action for all vector types to expand. Then we
362  // will selectively turn on ones that can be effectively codegen'd.
363  for (unsigned i = (unsigned)MVT::FIRST_VECTOR_VALUETYPE;
366 
367  // add/sub are legal for all supported vector VT's.
370 
371  // We promote all shuffles to v16i8.
374 
375  // We promote all non-typed operations to v4i32.
388 
389  // No other operations are legal.
429 
430  for (unsigned j = (unsigned)MVT::FIRST_VECTOR_VALUETYPE;
433  setTruncStoreAction(VT, InnerVT, Expand);
434  }
438  }
439 
440  // We can custom expand all VECTOR_SHUFFLEs to VPERM, others we can handle
441  // with merges, splats, etc.
443 
458 
459  addRegisterClass(MVT::v4f32, &PPC::VRRCRegClass);
460  addRegisterClass(MVT::v4i32, &PPC::VRRCRegClass);
461  addRegisterClass(MVT::v8i16, &PPC::VRRCRegClass);
462  addRegisterClass(MVT::v16i8, &PPC::VRRCRegClass);
463 
466 
467  if (TM.Options.UnsafeFPMath) {
470  }
471 
475 
478 
483 
484  // Altivec does not contain unordered floating-point compare instructions
491 
494  }
495 
496  if (Subtarget->has64BitSupport()) {
499  }
500 
505 
507  // Altivec instructions set fields to all zeros or all ones.
509 
510  if (isPPC64) {
514  } else {
518  }
519 
520  // We have target-specific dag combine patterns for the following nodes:
527 
528  // Use reciprocal estimates.
529  if (TM.Options.UnsafeFPMath) {
532  }
533 
534  // Darwin long double math library functions have $LDBL128 appended.
535  if (Subtarget->isDarwin()) {
536  setLibcallName(RTLIB::COS_PPCF128, "cosl$LDBL128");
537  setLibcallName(RTLIB::POW_PPCF128, "powl$LDBL128");
538  setLibcallName(RTLIB::REM_PPCF128, "fmodl$LDBL128");
539  setLibcallName(RTLIB::SIN_PPCF128, "sinl$LDBL128");
540  setLibcallName(RTLIB::SQRT_PPCF128, "sqrtl$LDBL128");
541  setLibcallName(RTLIB::LOG_PPCF128, "logl$LDBL128");
542  setLibcallName(RTLIB::LOG2_PPCF128, "log2l$LDBL128");
543  setLibcallName(RTLIB::LOG10_PPCF128, "log10l$LDBL128");
544  setLibcallName(RTLIB::EXP_PPCF128, "expl$LDBL128");
545  setLibcallName(RTLIB::EXP2_PPCF128, "exp2l$LDBL128");
546  }
547 
549  if (PPCSubTarget.isDarwin())
551 
552  if (isPPC64 && Subtarget->isJITCodeModel())
553  // Temporary workaround for the inability of PPC64 JIT to handle jump
554  // tables.
555  setSupportJumpTables(false);
556 
558 
559  if (Subtarget->enableMachineScheduler())
561  else
563 
565 
566  // The Freescale cores does better with aggressive inlining of memcpy and
567  // friends. Gcc uses same threshold of 128 bytes (= 32 word stores).
568  if (Subtarget->getDarwinDirective() == PPC::DIR_E500mc ||
569  Subtarget->getDarwinDirective() == PPC::DIR_E5500) {
570  MaxStoresPerMemset = 32;
572  MaxStoresPerMemcpy = 32;
574  MaxStoresPerMemmove = 32;
576 
578  }
579 }
580 
581 /// getMaxByValAlign - Helper for getByValTypeAlignment to determine
582 /// the desired ByVal argument alignment.
583 static void getMaxByValAlign(Type *Ty, unsigned &MaxAlign,
584  unsigned MaxMaxAlign) {
585  if (MaxAlign == MaxMaxAlign)
586  return;
587  if (VectorType *VTy = dyn_cast<VectorType>(Ty)) {
588  if (MaxMaxAlign >= 32 && VTy->getBitWidth() >= 256)
589  MaxAlign = 32;
590  else if (VTy->getBitWidth() >= 128 && MaxAlign < 16)
591  MaxAlign = 16;
592  } else if (ArrayType *ATy = dyn_cast<ArrayType>(Ty)) {
593  unsigned EltAlign = 0;
594  getMaxByValAlign(ATy->getElementType(), EltAlign, MaxMaxAlign);
595  if (EltAlign > MaxAlign)
596  MaxAlign = EltAlign;
597  } else if (StructType *STy = dyn_cast<StructType>(Ty)) {
598  for (unsigned i = 0, e = STy->getNumElements(); i != e; ++i) {
599  unsigned EltAlign = 0;
600  getMaxByValAlign(STy->getElementType(i), EltAlign, MaxMaxAlign);
601  if (EltAlign > MaxAlign)
602  MaxAlign = EltAlign;
603  if (MaxAlign == MaxMaxAlign)
604  break;
605  }
606  }
607 }
608 
609 /// getByValTypeAlignment - Return the desired alignment for ByVal aggregate
610 /// function arguments in the caller parameter area.
612  // Darwin passes everything on 4 byte boundary.
613  if (PPCSubTarget.isDarwin())
614  return 4;
615 
616  // 16byte and wider vectors are passed on 16byte boundary.
617  // The rest is 8 on PPC64 and 4 on PPC32 boundary.
618  unsigned Align = PPCSubTarget.isPPC64() ? 8 : 4;
619  if (PPCSubTarget.hasAltivec() || PPCSubTarget.hasQPX())
620  getMaxByValAlign(Ty, Align, PPCSubTarget.hasQPX() ? 32 : 16);
621  return Align;
622 }
623 
624 const char *PPCTargetLowering::getTargetNodeName(unsigned Opcode) const {
625  switch (Opcode) {
626  default: return 0;
627  case PPCISD::FSEL: return "PPCISD::FSEL";
628  case PPCISD::FCFID: return "PPCISD::FCFID";
629  case PPCISD::FCTIDZ: return "PPCISD::FCTIDZ";
630  case PPCISD::FCTIWZ: return "PPCISD::FCTIWZ";
631  case PPCISD::FRE: return "PPCISD::FRE";
632  case PPCISD::FRSQRTE: return "PPCISD::FRSQRTE";
633  case PPCISD::STFIWX: return "PPCISD::STFIWX";
634  case PPCISD::VMADDFP: return "PPCISD::VMADDFP";
635  case PPCISD::VNMSUBFP: return "PPCISD::VNMSUBFP";
636  case PPCISD::VPERM: return "PPCISD::VPERM";
637  case PPCISD::Hi: return "PPCISD::Hi";
638  case PPCISD::Lo: return "PPCISD::Lo";
639  case PPCISD::TOC_ENTRY: return "PPCISD::TOC_ENTRY";
640  case PPCISD::TOC_RESTORE: return "PPCISD::TOC_RESTORE";
641  case PPCISD::LOAD: return "PPCISD::LOAD";
642  case PPCISD::LOAD_TOC: return "PPCISD::LOAD_TOC";
643  case PPCISD::DYNALLOC: return "PPCISD::DYNALLOC";
644  case PPCISD::GlobalBaseReg: return "PPCISD::GlobalBaseReg";
645  case PPCISD::SRL: return "PPCISD::SRL";
646  case PPCISD::SRA: return "PPCISD::SRA";
647  case PPCISD::SHL: return "PPCISD::SHL";
648  case PPCISD::CALL: return "PPCISD::CALL";
649  case PPCISD::CALL_NOP: return "PPCISD::CALL_NOP";
650  case PPCISD::MTCTR: return "PPCISD::MTCTR";
651  case PPCISD::BCTRL: return "PPCISD::BCTRL";
652  case PPCISD::RET_FLAG: return "PPCISD::RET_FLAG";
653  case PPCISD::EH_SJLJ_SETJMP: return "PPCISD::EH_SJLJ_SETJMP";
654  case PPCISD::EH_SJLJ_LONGJMP: return "PPCISD::EH_SJLJ_LONGJMP";
655  case PPCISD::MFOCRF: return "PPCISD::MFOCRF";
656  case PPCISD::VCMP: return "PPCISD::VCMP";
657  case PPCISD::VCMPo: return "PPCISD::VCMPo";
658  case PPCISD::LBRX: return "PPCISD::LBRX";
659  case PPCISD::STBRX: return "PPCISD::STBRX";
660  case PPCISD::LARX: return "PPCISD::LARX";
661  case PPCISD::STCX: return "PPCISD::STCX";
662  case PPCISD::COND_BRANCH: return "PPCISD::COND_BRANCH";
663  case PPCISD::BDNZ: return "PPCISD::BDNZ";
664  case PPCISD::BDZ: return "PPCISD::BDZ";
665  case PPCISD::MFFS: return "PPCISD::MFFS";
666  case PPCISD::FADDRTZ: return "PPCISD::FADDRTZ";
667  case PPCISD::TC_RETURN: return "PPCISD::TC_RETURN";
668  case PPCISD::CR6SET: return "PPCISD::CR6SET";
669  case PPCISD::CR6UNSET: return "PPCISD::CR6UNSET";
670  case PPCISD::ADDIS_TOC_HA: return "PPCISD::ADDIS_TOC_HA";
671  case PPCISD::LD_TOC_L: return "PPCISD::LD_TOC_L";
672  case PPCISD::ADDI_TOC_L: return "PPCISD::ADDI_TOC_L";
673  case PPCISD::ADDIS_GOT_TPREL_HA: return "PPCISD::ADDIS_GOT_TPREL_HA";
674  case PPCISD::LD_GOT_TPREL_L: return "PPCISD::LD_GOT_TPREL_L";
675  case PPCISD::ADD_TLS: return "PPCISD::ADD_TLS";
676  case PPCISD::ADDIS_TLSGD_HA: return "PPCISD::ADDIS_TLSGD_HA";
677  case PPCISD::ADDI_TLSGD_L: return "PPCISD::ADDI_TLSGD_L";
678  case PPCISD::GET_TLS_ADDR: return "PPCISD::GET_TLS_ADDR";
679  case PPCISD::ADDIS_TLSLD_HA: return "PPCISD::ADDIS_TLSLD_HA";
680  case PPCISD::ADDI_TLSLD_L: return "PPCISD::ADDI_TLSLD_L";
681  case PPCISD::GET_TLSLD_ADDR: return "PPCISD::GET_TLSLD_ADDR";
682  case PPCISD::ADDIS_DTPREL_HA: return "PPCISD::ADDIS_DTPREL_HA";
683  case PPCISD::ADDI_DTPREL_L: return "PPCISD::ADDI_DTPREL_L";
684  case PPCISD::VADD_SPLAT: return "PPCISD::VADD_SPLAT";
685  case PPCISD::SC: return "PPCISD::SC";
686  }
687 }
688 
690  if (!VT.isVector())
691  return MVT::i32;
693 }
694 
695 //===----------------------------------------------------------------------===//
696 // Node matching predicates, for use by the tblgen matching code.
697 //===----------------------------------------------------------------------===//
698 
699 /// isFloatingPointZero - Return true if this is 0.0 or -0.0.
700 static bool isFloatingPointZero(SDValue Op) {
701  if (ConstantFPSDNode *CFP = dyn_cast<ConstantFPSDNode>(Op))
702  return CFP->getValueAPF().isZero();
703  else if (ISD::isEXTLoad(Op.getNode()) || ISD::isNON_EXTLoad(Op.getNode())) {
704  // Maybe this has already been legalized into the constant pool?
705  if (ConstantPoolSDNode *CP = dyn_cast<ConstantPoolSDNode>(Op.getOperand(1)))
706  if (const ConstantFP *CFP = dyn_cast<ConstantFP>(CP->getConstVal()))
707  return CFP->getValueAPF().isZero();
708  }
709  return false;
710 }
711 
712 /// isConstantOrUndef - Op is either an undef node or a ConstantSDNode. Return
713 /// true if Op is undef or if it matches the specified value.
714 static bool isConstantOrUndef(int Op, int Val) {
715  return Op < 0 || Op == Val;
716 }
717 
718 /// isVPKUHUMShuffleMask - Return true if this is the shuffle mask for a
719 /// VPKUHUM instruction.
721  if (!isUnary) {
722  for (unsigned i = 0; i != 16; ++i)
723  if (!isConstantOrUndef(N->getMaskElt(i), i*2+1))
724  return false;
725  } else {
726  for (unsigned i = 0; i != 8; ++i)
727  if (!isConstantOrUndef(N->getMaskElt(i), i*2+1) ||
728  !isConstantOrUndef(N->getMaskElt(i+8), i*2+1))
729  return false;
730  }
731  return true;
732 }
733 
734 /// isVPKUWUMShuffleMask - Return true if this is the shuffle mask for a
735 /// VPKUWUM instruction.
737  if (!isUnary) {
738  for (unsigned i = 0; i != 16; i += 2)
739  if (!isConstantOrUndef(N->getMaskElt(i ), i*2+2) ||
740  !isConstantOrUndef(N->getMaskElt(i+1), i*2+3))
741  return false;
742  } else {
743  for (unsigned i = 0; i != 8; i += 2)
744  if (!isConstantOrUndef(N->getMaskElt(i ), i*2+2) ||
745  !isConstantOrUndef(N->getMaskElt(i+1), i*2+3) ||
746  !isConstantOrUndef(N->getMaskElt(i+8), i*2+2) ||
747  !isConstantOrUndef(N->getMaskElt(i+9), i*2+3))
748  return false;
749  }
750  return true;
751 }
752 
753 /// isVMerge - Common function, used to match vmrg* shuffles.
754 ///
755 static bool isVMerge(ShuffleVectorSDNode *N, unsigned UnitSize,
756  unsigned LHSStart, unsigned RHSStart) {
757  assert(N->getValueType(0) == MVT::v16i8 &&
758  "PPC only supports shuffles by bytes!");
759  assert((UnitSize == 1 || UnitSize == 2 || UnitSize == 4) &&
760  "Unsupported merge size!");
761 
762  for (unsigned i = 0; i != 8/UnitSize; ++i) // Step over units
763  for (unsigned j = 0; j != UnitSize; ++j) { // Step over bytes within unit
764  if (!isConstantOrUndef(N->getMaskElt(i*UnitSize*2+j),
765  LHSStart+j+i*UnitSize) ||
766  !isConstantOrUndef(N->getMaskElt(i*UnitSize*2+UnitSize+j),
767  RHSStart+j+i*UnitSize))
768  return false;
769  }
770  return true;
771 }
772 
773 /// isVMRGLShuffleMask - Return true if this is a shuffle mask suitable for
774 /// a VRGL* instruction with the specified unit size (1,2 or 4 bytes).
776  bool isUnary) {
777  if (!isUnary)
778  return isVMerge(N, UnitSize, 8, 24);
779  return isVMerge(N, UnitSize, 8, 8);
780 }
781 
782 /// isVMRGHShuffleMask - Return true if this is a shuffle mask suitable for
783 /// a VRGH* instruction with the specified unit size (1,2 or 4 bytes).
785  bool isUnary) {
786  if (!isUnary)
787  return isVMerge(N, UnitSize, 0, 16);
788  return isVMerge(N, UnitSize, 0, 0);
789 }
790 
791 
792 /// isVSLDOIShuffleMask - If this is a vsldoi shuffle mask, return the shift
793 /// amount, otherwise return -1.
794 int PPC::isVSLDOIShuffleMask(SDNode *N, bool isUnary) {
795  assert(N->getValueType(0) == MVT::v16i8 &&
796  "PPC only supports shuffles by bytes!");
797 
798  ShuffleVectorSDNode *SVOp = cast<ShuffleVectorSDNode>(N);
799 
800  // Find the first non-undef value in the shuffle mask.
801  unsigned i;
802  for (i = 0; i != 16 && SVOp->getMaskElt(i) < 0; ++i)
803  /*search*/;
804 
805  if (i == 16) return -1; // all undef.
806 
807  // Otherwise, check to see if the rest of the elements are consecutively
808  // numbered from this value.
809  unsigned ShiftAmt = SVOp->getMaskElt(i);
810  if (ShiftAmt < i) return -1;
811  ShiftAmt -= i;
812 
813  if (!isUnary) {
814  // Check the rest of the elements to see if they are consecutive.
815  for (++i; i != 16; ++i)
816  if (!isConstantOrUndef(SVOp->getMaskElt(i), ShiftAmt+i))
817  return -1;
818  } else {
819  // Check the rest of the elements to see if they are consecutive.
820  for (++i; i != 16; ++i)
821  if (!isConstantOrUndef(SVOp->getMaskElt(i), (ShiftAmt+i) & 15))
822  return -1;
823  }
824  return ShiftAmt;
825 }
826 
827 /// isSplatShuffleMask - Return true if the specified VECTOR_SHUFFLE operand
828 /// specifies a splat of a single element that is suitable for input to
829 /// VSPLTB/VSPLTH/VSPLTW.
831  assert(N->getValueType(0) == MVT::v16i8 &&
832  (EltSize == 1 || EltSize == 2 || EltSize == 4));
833 
834  // This is a splat operation if each element of the permute is the same, and
835  // if the value doesn't reference the second vector.
836  unsigned ElementBase = N->getMaskElt(0);
837 
838  // FIXME: Handle UNDEF elements too!
839  if (ElementBase >= 16)
840  return false;
841 
842  // Check that the indices are consecutive, in the case of a multi-byte element
843  // splatted with a v16i8 mask.
844  for (unsigned i = 1; i != EltSize; ++i)
845  if (N->getMaskElt(i) < 0 || N->getMaskElt(i) != (int)(i+ElementBase))
846  return false;
847 
848  for (unsigned i = EltSize, e = 16; i != e; i += EltSize) {
849  if (N->getMaskElt(i) < 0) continue;
850  for (unsigned j = 0; j != EltSize; ++j)
851  if (N->getMaskElt(i+j) != N->getMaskElt(j))
852  return false;
853  }
854  return true;
855 }
856 
857 /// isAllNegativeZeroVector - Returns true if all elements of build_vector
858 /// are -0.0.
860  BuildVectorSDNode *BV = cast<BuildVectorSDNode>(N);
861 
862  APInt APVal, APUndef;
863  unsigned BitSize;
864  bool HasAnyUndefs;
865 
866  if (BV->isConstantSplat(APVal, APUndef, BitSize, HasAnyUndefs, 32, true))
867  if (ConstantFPSDNode *CFP = dyn_cast<ConstantFPSDNode>(N->getOperand(0)))
868  return CFP->getValueAPF().isNegZero();
869 
870  return false;
871 }
872 
873 /// getVSPLTImmediate - Return the appropriate VSPLT* immediate to splat the
874 /// specified isSplatShuffleMask VECTOR_SHUFFLE mask.
875 unsigned PPC::getVSPLTImmediate(SDNode *N, unsigned EltSize) {
876  ShuffleVectorSDNode *SVOp = cast<ShuffleVectorSDNode>(N);
877  assert(isSplatShuffleMask(SVOp, EltSize));
878  return SVOp->getMaskElt(0) / EltSize;
879 }
880 
881 /// get_VSPLTI_elt - If this is a build_vector of constants which can be formed
882 /// by using a vspltis[bhw] instruction of the specified element size, return
883 /// the constant being splatted. The ByteSize field indicates the number of
884 /// bytes of each element [124] -> [bhw].
885 SDValue PPC::get_VSPLTI_elt(SDNode *N, unsigned ByteSize, SelectionDAG &DAG) {
886  SDValue OpVal(0, 0);
887 
888  // If ByteSize of the splat is bigger than the element size of the
889  // build_vector, then we have a case where we are checking for a splat where
890  // multiple elements of the buildvector are folded together into a single
891  // logical element of the splat (e.g. "vsplish 1" to splat {0,1}*8).
892  unsigned EltSize = 16/N->getNumOperands();
893  if (EltSize < ByteSize) {
894  unsigned Multiple = ByteSize/EltSize; // Number of BV entries per spltval.
895  SDValue UniquedVals[4];
896  assert(Multiple > 1 && Multiple <= 4 && "How can this happen?");
897 
898  // See if all of the elements in the buildvector agree across.
899  for (unsigned i = 0, e = N->getNumOperands(); i != e; ++i) {
900  if (N->getOperand(i).getOpcode() == ISD::UNDEF) continue;
901  // If the element isn't a constant, bail fully out.
902  if (!isa<ConstantSDNode>(N->getOperand(i))) return SDValue();
903 
904 
905  if (UniquedVals[i&(Multiple-1)].getNode() == 0)
906  UniquedVals[i&(Multiple-1)] = N->getOperand(i);
907  else if (UniquedVals[i&(Multiple-1)] != N->getOperand(i))
908  return SDValue(); // no match.
909  }
910 
911  // Okay, if we reached this point, UniquedVals[0..Multiple-1] contains
912  // either constant or undef values that are identical for each chunk. See
913  // if these chunks can form into a larger vspltis*.
914 
915  // Check to see if all of the leading entries are either 0 or -1. If
916  // neither, then this won't fit into the immediate field.
917  bool LeadingZero = true;
918  bool LeadingOnes = true;
919  for (unsigned i = 0; i != Multiple-1; ++i) {
920  if (UniquedVals[i].getNode() == 0) continue; // Must have been undefs.
921 
922  LeadingZero &= cast<ConstantSDNode>(UniquedVals[i])->isNullValue();
923  LeadingOnes &= cast<ConstantSDNode>(UniquedVals[i])->isAllOnesValue();
924  }
925  // Finally, check the least significant entry.
926  if (LeadingZero) {
927  if (UniquedVals[Multiple-1].getNode() == 0)
928  return DAG.getTargetConstant(0, MVT::i32); // 0,0,0,undef
929  int Val = cast<ConstantSDNode>(UniquedVals[Multiple-1])->getZExtValue();
930  if (Val < 16)
931  return DAG.getTargetConstant(Val, MVT::i32); // 0,0,0,4 -> vspltisw(4)
932  }
933  if (LeadingOnes) {
934  if (UniquedVals[Multiple-1].getNode() == 0)
935  return DAG.getTargetConstant(~0U, MVT::i32); // -1,-1,-1,undef
936  int Val =cast<ConstantSDNode>(UniquedVals[Multiple-1])->getSExtValue();
937  if (Val >= -16) // -1,-1,-1,-2 -> vspltisw(-2)
938  return DAG.getTargetConstant(Val, MVT::i32);
939  }
940 
941  return SDValue();
942  }
943 
944  // Check to see if this buildvec has a single non-undef value in its elements.
945  for (unsigned i = 0, e = N->getNumOperands(); i != e; ++i) {
946  if (N->getOperand(i).getOpcode() == ISD::UNDEF) continue;
947  if (OpVal.getNode() == 0)
948  OpVal = N->getOperand(i);
949  else if (OpVal != N->getOperand(i))
950  return SDValue();
951  }
952 
953  if (OpVal.getNode() == 0) return SDValue(); // All UNDEF: use implicit def.
954 
955  unsigned ValSizeInBytes = EltSize;
956  uint64_t Value = 0;
957  if (ConstantSDNode *CN = dyn_cast<ConstantSDNode>(OpVal)) {
958  Value = CN->getZExtValue();
959  } else if (ConstantFPSDNode *CN = dyn_cast<ConstantFPSDNode>(OpVal)) {
960  assert(CN->getValueType(0) == MVT::f32 && "Only one legal FP vector type!");
961  Value = FloatToBits(CN->getValueAPF().convertToFloat());
962  }
963 
964  // If the splat value is larger than the element value, then we can never do
965  // this splat. The only case that we could fit the replicated bits into our
966  // immediate field for would be zero, and we prefer to use vxor for it.
967  if (ValSizeInBytes < ByteSize) return SDValue();
968 
969  // If the element value is larger than the splat value, cut it in half and
970  // check to see if the two halves are equal. Continue doing this until we
971  // get to ByteSize. This allows us to handle 0x01010101 as 0x01.
972  while (ValSizeInBytes > ByteSize) {
973  ValSizeInBytes >>= 1;
974 
975  // If the top half equals the bottom half, we're still ok.
976  if (((Value >> (ValSizeInBytes*8)) & ((1 << (8*ValSizeInBytes))-1)) !=
977  (Value & ((1 << (8*ValSizeInBytes))-1)))
978  return SDValue();
979  }
980 
981  // Properly sign extend the value.
982  int MaskVal = SignExtend32(Value, ByteSize * 8);
983 
984  // If this is zero, don't match, zero matches ISD::isBuildVectorAllZeros.
985  if (MaskVal == 0) return SDValue();
986 
987  // Finally, if this value fits in a 5 bit sext field, return it
988  if (SignExtend32<5>(MaskVal) == MaskVal)
989  return DAG.getTargetConstant(MaskVal, MVT::i32);
990  return SDValue();
991 }
992 
993 //===----------------------------------------------------------------------===//
994 // Addressing Mode Selection
995 //===----------------------------------------------------------------------===//
996 
997 /// isIntS16Immediate - This method tests to see if the node is either a 32-bit
998 /// or 64-bit immediate, and if the value can be accurately represented as a
999 /// sign extension from a 16-bit value. If so, this returns true and the
1000 /// immediate.
1001 static bool isIntS16Immediate(SDNode *N, short &Imm) {
1002  if (N->getOpcode() != ISD::Constant)
1003  return false;
1004 
1005  Imm = (short)cast<ConstantSDNode>(N)->getZExtValue();
1006  if (N->getValueType(0) == MVT::i32)
1007  return Imm == (int32_t)cast<ConstantSDNode>(N)->getZExtValue();
1008  else
1009  return Imm == (int64_t)cast<ConstantSDNode>(N)->getZExtValue();
1010 }
1011 static bool isIntS16Immediate(SDValue Op, short &Imm) {
1012  return isIntS16Immediate(Op.getNode(), Imm);
1013 }
1014 
1015 
1016 /// SelectAddressRegReg - Given the specified addressed, check to see if it
1017 /// can be represented as an indexed [r+r] operation. Returns false if it
1018 /// can be more efficiently represented with [r+imm].
1020  SDValue &Index,
1021  SelectionDAG &DAG) const {
1022  short imm = 0;
1023  if (N.getOpcode() == ISD::ADD) {
1024  if (isIntS16Immediate(N.getOperand(1), imm))
1025  return false; // r+i
1026  if (N.getOperand(1).getOpcode() == PPCISD::Lo)
1027  return false; // r+i
1028 
1029  Base = N.getOperand(0);
1030  Index = N.getOperand(1);
1031  return true;
1032  } else if (N.getOpcode() == ISD::OR) {
1033  if (isIntS16Immediate(N.getOperand(1), imm))
1034  return false; // r+i can fold it if we can.
1035 
1036  // If this is an or of disjoint bitfields, we can codegen this as an add
1037  // (for better address arithmetic) if the LHS and RHS of the OR are provably
1038  // disjoint.
1039  APInt LHSKnownZero, LHSKnownOne;
1040  APInt RHSKnownZero, RHSKnownOne;
1041  DAG.ComputeMaskedBits(N.getOperand(0),
1042  LHSKnownZero, LHSKnownOne);
1043 
1044  if (LHSKnownZero.getBoolValue()) {
1045  DAG.ComputeMaskedBits(N.getOperand(1),
1046  RHSKnownZero, RHSKnownOne);
1047  // If all of the bits are known zero on the LHS or RHS, the add won't
1048  // carry.
1049  if (~(LHSKnownZero | RHSKnownZero) == 0) {
1050  Base = N.getOperand(0);
1051  Index = N.getOperand(1);
1052  return true;
1053  }
1054  }
1055  }
1056 
1057  return false;
1058 }
1059 
1060 // If we happen to be doing an i64 load or store into a stack slot that has
1061 // less than a 4-byte alignment, then the frame-index elimination may need to
1062 // use an indexed load or store instruction (because the offset may not be a
1063 // multiple of 4). The extra register needed to hold the offset comes from the
1064 // register scavenger, and it is possible that the scavenger will need to use
1065 // an emergency spill slot. As a result, we need to make sure that a spill slot
1066 // is allocated when doing an i64 load/store into a less-than-4-byte-aligned
1067 // stack slot.
1068 static void fixupFuncForFI(SelectionDAG &DAG, int FrameIdx, EVT VT) {
1069  // FIXME: This does not handle the LWA case.
1070  if (VT != MVT::i64)
1071  return;
1072 
1073  // NOTE: We'll exclude negative FIs here, which come from argument
1074  // lowering, because there are no known test cases triggering this problem
1075  // using packed structures (or similar). We can remove this exclusion if
1076  // we find such a test case. The reason why this is so test-case driven is
1077  // because this entire 'fixup' is only to prevent crashes (from the
1078  // register scavenger) on not-really-valid inputs. For example, if we have:
1079  // %a = alloca i1
1080  // %b = bitcast i1* %a to i64*
1081  // store i64* a, i64 b
1082  // then the store should really be marked as 'align 1', but is not. If it
1083  // were marked as 'align 1' then the indexed form would have been
1084  // instruction-selected initially, and the problem this 'fixup' is preventing
1085  // won't happen regardless.
1086  if (FrameIdx < 0)
1087  return;
1088 
1089  MachineFunction &MF = DAG.getMachineFunction();
1090  MachineFrameInfo *MFI = MF.getFrameInfo();
1091 
1092  unsigned Align = MFI->getObjectAlignment(FrameIdx);
1093  if (Align >= 4)
1094  return;
1095 
1096  PPCFunctionInfo *FuncInfo = MF.getInfo<PPCFunctionInfo>();
1097  FuncInfo->setHasNonRISpills();
1098 }
1099 
1100 /// Returns true if the address N can be represented by a base register plus
1101 /// a signed 16-bit displacement [r+imm], and if it is not better
1102 /// represented as reg+reg. If Aligned is true, only accept displacements
1103 /// suitable for STD and friends, i.e. multiples of 4.
1105  SDValue &Base,
1106  SelectionDAG &DAG,
1107  bool Aligned) const {
1108  // FIXME dl should come from parent load or store, not from address
1109  SDLoc dl(N);
1110  // If this can be more profitably realized as r+r, fail.
1111  if (SelectAddressRegReg(N, Disp, Base, DAG))
1112  return false;
1113 
1114  if (N.getOpcode() == ISD::ADD) {
1115  short imm = 0;
1116  if (isIntS16Immediate(N.getOperand(1), imm) &&
1117  (!Aligned || (imm & 3) == 0)) {
1118  Disp = DAG.getTargetConstant(imm, N.getValueType());
1119  if (FrameIndexSDNode *FI = dyn_cast<FrameIndexSDNode>(N.getOperand(0))) {
1120  Base = DAG.getTargetFrameIndex(FI->getIndex(), N.getValueType());
1121  fixupFuncForFI(DAG, FI->getIndex(), N.getValueType());
1122  } else {
1123  Base = N.getOperand(0);
1124  }
1125  return true; // [r+i]
1126  } else if (N.getOperand(1).getOpcode() == PPCISD::Lo) {
1127  // Match LOAD (ADD (X, Lo(G))).
1128  assert(!cast<ConstantSDNode>(N.getOperand(1).getOperand(1))->getZExtValue()
1129  && "Cannot handle constant offsets yet!");
1130  Disp = N.getOperand(1).getOperand(0); // The global address.
1131  assert(Disp.getOpcode() == ISD::TargetGlobalAddress ||
1132  Disp.getOpcode() == ISD::TargetGlobalTLSAddress ||
1133  Disp.getOpcode() == ISD::TargetConstantPool ||
1134  Disp.getOpcode() == ISD::TargetJumpTable);
1135  Base = N.getOperand(0);
1136  return true; // [&g+r]
1137  }
1138  } else if (N.getOpcode() == ISD::OR) {
1139  short imm = 0;
1140  if (isIntS16Immediate(N.getOperand(1), imm) &&
1141  (!Aligned || (imm & 3) == 0)) {
1142  // If this is an or of disjoint bitfields, we can codegen this as an add
1143  // (for better address arithmetic) if the LHS and RHS of the OR are
1144  // provably disjoint.
1145  APInt LHSKnownZero, LHSKnownOne;
1146  DAG.ComputeMaskedBits(N.getOperand(0), LHSKnownZero, LHSKnownOne);
1147 
1148  if ((LHSKnownZero.getZExtValue()|~(uint64_t)imm) == ~0ULL) {
1149  // If all of the bits are known zero on the LHS or RHS, the add won't
1150  // carry.
1151  Base = N.getOperand(0);
1152  Disp = DAG.getTargetConstant(imm, N.getValueType());
1153  return true;
1154  }
1155  }
1156  } else if (ConstantSDNode *CN = dyn_cast<ConstantSDNode>(N)) {
1157  // Loading from a constant address.
1158 
1159  // If this address fits entirely in a 16-bit sext immediate field, codegen
1160  // this as "d, 0"
1161  short Imm;
1162  if (isIntS16Immediate(CN, Imm) && (!Aligned || (Imm & 3) == 0)) {
1163  Disp = DAG.getTargetConstant(Imm, CN->getValueType(0));
1164  Base = DAG.getRegister(PPCSubTarget.isPPC64() ? PPC::ZERO8 : PPC::ZERO,
1165  CN->getValueType(0));
1166  return true;
1167  }
1168 
1169  // Handle 32-bit sext immediates with LIS + addr mode.
1170  if ((CN->getValueType(0) == MVT::i32 ||
1171  (int64_t)CN->getZExtValue() == (int)CN->getZExtValue()) &&
1172  (!Aligned || (CN->getZExtValue() & 3) == 0)) {
1173  int Addr = (int)CN->getZExtValue();
1174 
1175  // Otherwise, break this down into an LIS + disp.
1176  Disp = DAG.getTargetConstant((short)Addr, MVT::i32);
1177 
1178  Base = DAG.getTargetConstant((Addr - (signed short)Addr) >> 16, MVT::i32);
1179  unsigned Opc = CN->getValueType(0) == MVT::i32 ? PPC::LIS : PPC::LIS8;
1180  Base = SDValue(DAG.getMachineNode(Opc, dl, CN->getValueType(0), Base), 0);
1181  return true;
1182  }
1183  }
1184 
1185  Disp = DAG.getTargetConstant(0, getPointerTy());
1186  if (FrameIndexSDNode *FI = dyn_cast<FrameIndexSDNode>(N)) {
1187  Base = DAG.getTargetFrameIndex(FI->getIndex(), N.getValueType());
1188  fixupFuncForFI(DAG, FI->getIndex(), N.getValueType());
1189  } else
1190  Base = N;
1191  return true; // [r+0]
1192 }
1193 
1194 /// SelectAddressRegRegOnly - Given the specified addressed, force it to be
1195 /// represented as an indexed [r+r] operation.
1197  SDValue &Index,
1198  SelectionDAG &DAG) const {
1199  // Check to see if we can easily represent this as an [r+r] address. This
1200  // will fail if it thinks that the address is more profitably represented as
1201  // reg+imm, e.g. where imm = 0.
1202  if (SelectAddressRegReg(N, Base, Index, DAG))
1203  return true;
1204 
1205  // If the operand is an addition, always emit this as [r+r], since this is
1206  // better (for code size, and execution, as the memop does the add for free)
1207  // than emitting an explicit add.
1208  if (N.getOpcode() == ISD::ADD) {
1209  Base = N.getOperand(0);
1210  Index = N.getOperand(1);
1211  return true;
1212  }
1213 
1214  // Otherwise, do it the hard way, using R0 as the base register.
1215  Base = DAG.getRegister(PPCSubTarget.isPPC64() ? PPC::ZERO8 : PPC::ZERO,
1216  N.getValueType());
1217  Index = N;
1218  return true;
1219 }
1220 
1221 /// getPreIndexedAddressParts - returns true by value, base pointer and
1222 /// offset pointer and addressing mode by reference if the node's address
1223 /// can be legally represented as pre-indexed load / store address.
1225  SDValue &Offset,
1226  ISD::MemIndexedMode &AM,
1227  SelectionDAG &DAG) const {
1228  if (DisablePPCPreinc) return false;
1229 
1230  bool isLoad = true;
1231  SDValue Ptr;
1232  EVT VT;
1233  unsigned Alignment;
1234  if (LoadSDNode *LD = dyn_cast<LoadSDNode>(N)) {
1235  Ptr = LD->getBasePtr();
1236  VT = LD->getMemoryVT();
1237  Alignment = LD->getAlignment();
1238  } else if (StoreSDNode *ST = dyn_cast<StoreSDNode>(N)) {
1239  Ptr = ST->getBasePtr();
1240  VT = ST->getMemoryVT();
1241  Alignment = ST->getAlignment();
1242  isLoad = false;
1243  } else
1244  return false;
1245 
1246  // PowerPC doesn't have preinc load/store instructions for vectors.
1247  if (VT.isVector())
1248  return false;
1249 
1250  if (SelectAddressRegReg(Ptr, Base, Offset, DAG)) {
1251 
1252  // Common code will reject creating a pre-inc form if the base pointer
1253  // is a frame index, or if N is a store and the base pointer is either
1254  // the same as or a predecessor of the value being stored. Check for
1255  // those situations here, and try with swapped Base/Offset instead.
1256  bool Swap = false;
1257 
1258  if (isa<FrameIndexSDNode>(Base) || isa<RegisterSDNode>(Base))
1259  Swap = true;
1260  else if (!isLoad) {
1261  SDValue Val = cast<StoreSDNode>(N)->getValue();
1262  if (Val == Base || Base.getNode()->isPredecessorOf(Val.getNode()))
1263  Swap = true;
1264  }
1265 
1266  if (Swap)
1267  std::swap(Base, Offset);
1268 
1269  AM = ISD::PRE_INC;
1270  return true;
1271  }
1272 
1273  // LDU/STU can only handle immediates that are a multiple of 4.
1274  if (VT != MVT::i64) {
1275  if (!SelectAddressRegImm(Ptr, Offset, Base, DAG, false))
1276  return false;
1277  } else {
1278  // LDU/STU need an address with at least 4-byte alignment.
1279  if (Alignment < 4)
1280  return false;
1281 
1282  if (!SelectAddressRegImm(Ptr, Offset, Base, DAG, true))
1283  return false;
1284  }
1285 
1286  if (LoadSDNode *LD = dyn_cast<LoadSDNode>(N)) {
1287  // PPC64 doesn't have lwau, but it does have lwaux. Reject preinc load of
1288  // sext i32 to i64 when addr mode is r+i.
1289  if (LD->getValueType(0) == MVT::i64 && LD->getMemoryVT() == MVT::i32 &&
1290  LD->getExtensionType() == ISD::SEXTLOAD &&
1291  isa<ConstantSDNode>(Offset))
1292  return false;
1293  }
1294 
1295  AM = ISD::PRE_INC;
1296  return true;
1297 }
1298 
1299 //===----------------------------------------------------------------------===//
1300 // LowerOperation implementation
1301 //===----------------------------------------------------------------------===//
1302 
1303 /// GetLabelAccessInfo - Return true if we should reference labels using a
1304 /// PICBase, set the HiOpFlags and LoOpFlags to the target MO flags.
1305 static bool GetLabelAccessInfo(const TargetMachine &TM, unsigned &HiOpFlags,
1306  unsigned &LoOpFlags, const GlobalValue *GV = 0) {
1307  HiOpFlags = PPCII::MO_HA;
1308  LoOpFlags = PPCII::MO_LO;
1309 
1310  // Don't use the pic base if not in PIC relocation model. Or if we are on a
1311  // non-darwin platform. We don't support PIC on other platforms yet.
1312  bool isPIC = TM.getRelocationModel() == Reloc::PIC_ &&
1313  TM.getSubtarget<PPCSubtarget>().isDarwin();
1314  if (isPIC) {
1315  HiOpFlags |= PPCII::MO_PIC_FLAG;
1316  LoOpFlags |= PPCII::MO_PIC_FLAG;
1317  }
1318 
1319  // If this is a reference to a global value that requires a non-lazy-ptr, make
1320  // sure that instruction lowering adds it.
1321  if (GV && TM.getSubtarget<PPCSubtarget>().hasLazyResolverStub(GV, TM)) {
1322  HiOpFlags |= PPCII::MO_NLP_FLAG;
1323  LoOpFlags |= PPCII::MO_NLP_FLAG;
1324 
1325  if (GV->hasHiddenVisibility()) {
1326  HiOpFlags |= PPCII::MO_NLP_HIDDEN_FLAG;
1327  LoOpFlags |= PPCII::MO_NLP_HIDDEN_FLAG;
1328  }
1329  }
1330 
1331  return isPIC;
1332 }
1333 
1334 static SDValue LowerLabelRef(SDValue HiPart, SDValue LoPart, bool isPIC,
1335  SelectionDAG &DAG) {
1336  EVT PtrVT = HiPart.getValueType();
1337  SDValue Zero = DAG.getConstant(0, PtrVT);
1338  SDLoc DL(HiPart);
1339 
1340  SDValue Hi = DAG.getNode(PPCISD::Hi, DL, PtrVT, HiPart, Zero);
1341  SDValue Lo = DAG.getNode(PPCISD::Lo, DL, PtrVT, LoPart, Zero);
1342 
1343  // With PIC, the first instruction is actually "GR+hi(&G)".
1344  if (isPIC)
1345  Hi = DAG.getNode(ISD::ADD, DL, PtrVT,
1346  DAG.getNode(PPCISD::GlobalBaseReg, DL, PtrVT), Hi);
1347 
1348  // Generate non-pic code that has direct accesses to the constant pool.
1349  // The address of the global is just (hi(&g)+lo(&g)).
1350  return DAG.getNode(ISD::ADD, DL, PtrVT, Hi, Lo);
1351 }
1352 
1353 SDValue PPCTargetLowering::LowerConstantPool(SDValue Op,
1354  SelectionDAG &DAG) const {
1355  EVT PtrVT = Op.getValueType();
1356  ConstantPoolSDNode *CP = cast<ConstantPoolSDNode>(Op);
1357  const Constant *C = CP->getConstVal();
1358 
1359  // 64-bit SVR4 ABI code is always position-independent.
1360  // The actual address of the GlobalValue is stored in the TOC.
1361  if (PPCSubTarget.isSVR4ABI() && PPCSubTarget.isPPC64()) {
1362  SDValue GA = DAG.getTargetConstantPool(C, PtrVT, CP->getAlignment(), 0);
1363  return DAG.getNode(PPCISD::TOC_ENTRY, SDLoc(CP), MVT::i64, GA,
1364  DAG.getRegister(PPC::X2, MVT::i64));
1365  }
1366 
1367  unsigned MOHiFlag, MOLoFlag;
1368  bool isPIC = GetLabelAccessInfo(DAG.getTarget(), MOHiFlag, MOLoFlag);
1369  SDValue CPIHi =
1370  DAG.getTargetConstantPool(C, PtrVT, CP->getAlignment(), 0, MOHiFlag);
1371  SDValue CPILo =
1372  DAG.getTargetConstantPool(C, PtrVT, CP->getAlignment(), 0, MOLoFlag);
1373  return LowerLabelRef(CPIHi, CPILo, isPIC, DAG);
1374 }
1375 
1376 SDValue PPCTargetLowering::LowerJumpTable(SDValue Op, SelectionDAG &DAG) const {
1377  EVT PtrVT = Op.getValueType();
1378  JumpTableSDNode *JT = cast<JumpTableSDNode>(Op);
1379 
1380  // 64-bit SVR4 ABI code is always position-independent.
1381  // The actual address of the GlobalValue is stored in the TOC.
1382  if (PPCSubTarget.isSVR4ABI() && PPCSubTarget.isPPC64()) {
1383  SDValue GA = DAG.getTargetJumpTable(JT->getIndex(), PtrVT);
1384  return DAG.getNode(PPCISD::TOC_ENTRY, SDLoc(JT), MVT::i64, GA,
1385  DAG.getRegister(PPC::X2, MVT::i64));
1386  }
1387 
1388  unsigned MOHiFlag, MOLoFlag;
1389  bool isPIC = GetLabelAccessInfo(DAG.getTarget(), MOHiFlag, MOLoFlag);
1390  SDValue JTIHi = DAG.getTargetJumpTable(JT->getIndex(), PtrVT, MOHiFlag);
1391  SDValue JTILo = DAG.getTargetJumpTable(JT->getIndex(), PtrVT, MOLoFlag);
1392  return LowerLabelRef(JTIHi, JTILo, isPIC, DAG);
1393 }
1394 
1395 SDValue PPCTargetLowering::LowerBlockAddress(SDValue Op,
1396  SelectionDAG &DAG) const {
1397  EVT PtrVT = Op.getValueType();
1398 
1399  const BlockAddress *BA = cast<BlockAddressSDNode>(Op)->getBlockAddress();
1400 
1401  unsigned MOHiFlag, MOLoFlag;
1402  bool isPIC = GetLabelAccessInfo(DAG.getTarget(), MOHiFlag, MOLoFlag);
1403  SDValue TgtBAHi = DAG.getTargetBlockAddress(BA, PtrVT, 0, MOHiFlag);
1404  SDValue TgtBALo = DAG.getTargetBlockAddress(BA, PtrVT, 0, MOLoFlag);
1405  return LowerLabelRef(TgtBAHi, TgtBALo, isPIC, DAG);
1406 }
1407 
1408 SDValue PPCTargetLowering::LowerGlobalTLSAddress(SDValue Op,
1409  SelectionDAG &DAG) const {
1410 
1411  // FIXME: TLS addresses currently use medium model code sequences,
1412  // which is the most useful form. Eventually support for small and
1413  // large models could be added if users need it, at the cost of
1414  // additional complexity.
1415  GlobalAddressSDNode *GA = cast<GlobalAddressSDNode>(Op);
1416  SDLoc dl(GA);
1417  const GlobalValue *GV = GA->getGlobal();
1418  EVT PtrVT = getPointerTy();
1419  bool is64bit = PPCSubTarget.isPPC64();
1420 
1422 
1423  if (Model == TLSModel::LocalExec) {
1424  SDValue TGAHi = DAG.getTargetGlobalAddress(GV, dl, PtrVT, 0,
1426  SDValue TGALo = DAG.getTargetGlobalAddress(GV, dl, PtrVT, 0,
1428  SDValue TLSReg = DAG.getRegister(is64bit ? PPC::X13 : PPC::R2,
1429  is64bit ? MVT::i64 : MVT::i32);
1430  SDValue Hi = DAG.getNode(PPCISD::Hi, dl, PtrVT, TGAHi, TLSReg);
1431  return DAG.getNode(PPCISD::Lo, dl, PtrVT, TGALo, Hi);
1432  }
1433 
1434  if (!is64bit)
1435  llvm_unreachable("only local-exec is currently supported for ppc32");
1436 
1437  if (Model == TLSModel::InitialExec) {
1438  SDValue TGA = DAG.getTargetGlobalAddress(GV, dl, PtrVT, 0, 0);
1439  SDValue TGATLS = DAG.getTargetGlobalAddress(GV, dl, PtrVT, 0,
1440  PPCII::MO_TLS);
1441  SDValue GOTReg = DAG.getRegister(PPC::X2, MVT::i64);
1442  SDValue TPOffsetHi = DAG.getNode(PPCISD::ADDIS_GOT_TPREL_HA, dl,
1443  PtrVT, GOTReg, TGA);
1444  SDValue TPOffset = DAG.getNode(PPCISD::LD_GOT_TPREL_L, dl,
1445  PtrVT, TGA, TPOffsetHi);
1446  return DAG.getNode(PPCISD::ADD_TLS, dl, PtrVT, TPOffset, TGATLS);
1447  }
1448 
1449  if (Model == TLSModel::GeneralDynamic) {
1450  SDValue TGA = DAG.getTargetGlobalAddress(GV, dl, PtrVT, 0, 0);
1451  SDValue GOTReg = DAG.getRegister(PPC::X2, MVT::i64);
1452  SDValue GOTEntryHi = DAG.getNode(PPCISD::ADDIS_TLSGD_HA, dl, PtrVT,
1453  GOTReg, TGA);
1454  SDValue GOTEntry = DAG.getNode(PPCISD::ADDI_TLSGD_L, dl, PtrVT,
1455  GOTEntryHi, TGA);
1456 
1457  // We need a chain node, and don't have one handy. The underlying
1458  // call has no side effects, so using the function entry node
1459  // suffices.
1460  SDValue Chain = DAG.getEntryNode();
1461  Chain = DAG.getCopyToReg(Chain, dl, PPC::X3, GOTEntry);
1462  SDValue ParmReg = DAG.getRegister(PPC::X3, MVT::i64);
1463  SDValue TLSAddr = DAG.getNode(PPCISD::GET_TLS_ADDR, dl,
1464  PtrVT, ParmReg, TGA);
1465  // The return value from GET_TLS_ADDR really is in X3 already, but
1466  // some hacks are needed here to tie everything together. The extra
1467  // copies dissolve during subsequent transforms.
1468  Chain = DAG.getCopyToReg(Chain, dl, PPC::X3, TLSAddr);
1469  return DAG.getCopyFromReg(Chain, dl, PPC::X3, PtrVT);
1470  }
1471 
1472  if (Model == TLSModel::LocalDynamic) {
1473  SDValue TGA = DAG.getTargetGlobalAddress(GV, dl, PtrVT, 0, 0);
1474  SDValue GOTReg = DAG.getRegister(PPC::X2, MVT::i64);
1475  SDValue GOTEntryHi = DAG.getNode(PPCISD::ADDIS_TLSLD_HA, dl, PtrVT,
1476  GOTReg, TGA);
1477  SDValue GOTEntry = DAG.getNode(PPCISD::ADDI_TLSLD_L, dl, PtrVT,
1478  GOTEntryHi, TGA);
1479 
1480  // We need a chain node, and don't have one handy. The underlying
1481  // call has no side effects, so using the function entry node
1482  // suffices.
1483  SDValue Chain = DAG.getEntryNode();
1484  Chain = DAG.getCopyToReg(Chain, dl, PPC::X3, GOTEntry);
1485  SDValue ParmReg = DAG.getRegister(PPC::X3, MVT::i64);
1486  SDValue TLSAddr = DAG.getNode(PPCISD::GET_TLSLD_ADDR, dl,
1487  PtrVT, ParmReg, TGA);
1488  // The return value from GET_TLSLD_ADDR really is in X3 already, but
1489  // some hacks are needed here to tie everything together. The extra
1490  // copies dissolve during subsequent transforms.
1491  Chain = DAG.getCopyToReg(Chain, dl, PPC::X3, TLSAddr);
1492  SDValue DtvOffsetHi = DAG.getNode(PPCISD::ADDIS_DTPREL_HA, dl, PtrVT,
1493  Chain, ParmReg, TGA);
1494  return DAG.getNode(PPCISD::ADDI_DTPREL_L, dl, PtrVT, DtvOffsetHi, TGA);
1495  }
1496 
1497  llvm_unreachable("Unknown TLS model!");
1498 }
1499 
1500 SDValue PPCTargetLowering::LowerGlobalAddress(SDValue Op,
1501  SelectionDAG &DAG) const {
1502  EVT PtrVT = Op.getValueType();
1503  GlobalAddressSDNode *GSDN = cast<GlobalAddressSDNode>(Op);
1504  SDLoc DL(GSDN);
1505  const GlobalValue *GV = GSDN->getGlobal();
1506 
1507  // 64-bit SVR4 ABI code is always position-independent.
1508  // The actual address of the GlobalValue is stored in the TOC.
1509  if (PPCSubTarget.isSVR4ABI() && PPCSubTarget.isPPC64()) {
1510  SDValue GA = DAG.getTargetGlobalAddress(GV, DL, PtrVT, GSDN->getOffset());
1511  return DAG.getNode(PPCISD::TOC_ENTRY, DL, MVT::i64, GA,
1512  DAG.getRegister(PPC::X2, MVT::i64));
1513  }
1514 
1515  unsigned MOHiFlag, MOLoFlag;
1516  bool isPIC = GetLabelAccessInfo(DAG.getTarget(), MOHiFlag, MOLoFlag, GV);
1517 
1518  SDValue GAHi =
1519  DAG.getTargetGlobalAddress(GV, DL, PtrVT, GSDN->getOffset(), MOHiFlag);
1520  SDValue GALo =
1521  DAG.getTargetGlobalAddress(GV, DL, PtrVT, GSDN->getOffset(), MOLoFlag);
1522 
1523  SDValue Ptr = LowerLabelRef(GAHi, GALo, isPIC, DAG);
1524 
1525  // If the global reference is actually to a non-lazy-pointer, we have to do an
1526  // extra load to get the address of the global.
1527  if (MOHiFlag & PPCII::MO_NLP_FLAG)
1528  Ptr = DAG.getLoad(PtrVT, DL, DAG.getEntryNode(), Ptr, MachinePointerInfo(),
1529  false, false, false, 0);
1530  return Ptr;
1531 }
1532 
1533 SDValue PPCTargetLowering::LowerSETCC(SDValue Op, SelectionDAG &DAG) const {
1534  ISD::CondCode CC = cast<CondCodeSDNode>(Op.getOperand(2))->get();
1535  SDLoc dl(Op);
1536 
1537  // If we're comparing for equality to zero, expose the fact that this is
1538  // implented as a ctlz/srl pair on ppc, so that the dag combiner can
1539  // fold the new nodes.
1540  if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(Op.getOperand(1))) {
1541  if (C->isNullValue() && CC == ISD::SETEQ) {
1542  EVT VT = Op.getOperand(0).getValueType();
1543  SDValue Zext = Op.getOperand(0);
1544  if (VT.bitsLT(MVT::i32)) {
1545  VT = MVT::i32;
1546  Zext = DAG.getNode(ISD::ZERO_EXTEND, dl, VT, Op.getOperand(0));
1547  }
1548  unsigned Log2b = Log2_32(VT.getSizeInBits());
1549  SDValue Clz = DAG.getNode(ISD::CTLZ, dl, VT, Zext);
1550  SDValue Scc = DAG.getNode(ISD::SRL, dl, VT, Clz,
1551  DAG.getConstant(Log2b, MVT::i32));
1552  return DAG.getNode(ISD::TRUNCATE, dl, MVT::i32, Scc);
1553  }
1554  // Leave comparisons against 0 and -1 alone for now, since they're usually
1555  // optimized. FIXME: revisit this when we can custom lower all setcc
1556  // optimizations.
1557  if (C->isAllOnesValue() || C->isNullValue())
1558  return SDValue();
1559  }
1560 
1561  // If we have an integer seteq/setne, turn it into a compare against zero
1562  // by xor'ing the rhs with the lhs, which is faster than setting a
1563  // condition register, reading it back out, and masking the correct bit. The
1564  // normal approach here uses sub to do this instead of xor. Using xor exposes
1565  // the result to other bit-twiddling opportunities.
1566  EVT LHSVT = Op.getOperand(0).getValueType();
1567  if (LHSVT.isInteger() && (CC == ISD::SETEQ || CC == ISD::SETNE)) {
1568  EVT VT = Op.getValueType();
1569  SDValue Sub = DAG.getNode(ISD::XOR, dl, LHSVT, Op.getOperand(0),
1570  Op.getOperand(1));
1571  return DAG.getSetCC(dl, VT, Sub, DAG.getConstant(0, LHSVT), CC);
1572  }
1573  return SDValue();
1574 }
1575 
1576 SDValue PPCTargetLowering::LowerVAARG(SDValue Op, SelectionDAG &DAG,
1577  const PPCSubtarget &Subtarget) const {
1578  SDNode *Node = Op.getNode();
1579  EVT VT = Node->getValueType(0);
1580  EVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy();
1581  SDValue InChain = Node->getOperand(0);
1582  SDValue VAListPtr = Node->getOperand(1);
1583  const Value *SV = cast<SrcValueSDNode>(Node->getOperand(2))->getValue();
1584  SDLoc dl(Node);
1585 
1586  assert(!Subtarget.isPPC64() && "LowerVAARG is PPC32 only");
1587 
1588  // gpr_index
1589  SDValue GprIndex = DAG.getExtLoad(ISD::ZEXTLOAD, dl, MVT::i32, InChain,
1590  VAListPtr, MachinePointerInfo(SV), MVT::i8,
1591  false, false, 0);
1592  InChain = GprIndex.getValue(1);
1593 
1594  if (VT == MVT::i64) {
1595  // Check if GprIndex is even
1596  SDValue GprAnd = DAG.getNode(ISD::AND, dl, MVT::i32, GprIndex,
1597  DAG.getConstant(1, MVT::i32));
1598  SDValue CC64 = DAG.getSetCC(dl, MVT::i32, GprAnd,
1599  DAG.getConstant(0, MVT::i32), ISD::SETNE);
1600  SDValue GprIndexPlusOne = DAG.getNode(ISD::ADD, dl, MVT::i32, GprIndex,
1601  DAG.getConstant(1, MVT::i32));
1602  // Align GprIndex to be even if it isn't
1603  GprIndex = DAG.getNode(ISD::SELECT, dl, MVT::i32, CC64, GprIndexPlusOne,
1604  GprIndex);
1605  }
1606 
1607  // fpr index is 1 byte after gpr
1608  SDValue FprPtr = DAG.getNode(ISD::ADD, dl, PtrVT, VAListPtr,
1609  DAG.getConstant(1, MVT::i32));
1610 
1611  // fpr
1612  SDValue FprIndex = DAG.getExtLoad(ISD::ZEXTLOAD, dl, MVT::i32, InChain,
1613  FprPtr, MachinePointerInfo(SV), MVT::i8,
1614  false, false, 0);
1615  InChain = FprIndex.getValue(1);
1616 
1617  SDValue RegSaveAreaPtr = DAG.getNode(ISD::ADD, dl, PtrVT, VAListPtr,
1618  DAG.getConstant(8, MVT::i32));
1619 
1620  SDValue OverflowAreaPtr = DAG.getNode(ISD::ADD, dl, PtrVT, VAListPtr,
1621  DAG.getConstant(4, MVT::i32));
1622 
1623  // areas
1624  SDValue OverflowArea = DAG.getLoad(MVT::i32, dl, InChain, OverflowAreaPtr,
1625  MachinePointerInfo(), false, false,
1626  false, 0);
1627  InChain = OverflowArea.getValue(1);
1628 
1629  SDValue RegSaveArea = DAG.getLoad(MVT::i32, dl, InChain, RegSaveAreaPtr,
1630  MachinePointerInfo(), false, false,
1631  false, 0);
1632  InChain = RegSaveArea.getValue(1);
1633 
1634  // select overflow_area if index > 8
1635  SDValue CC = DAG.getSetCC(dl, MVT::i32, VT.isInteger() ? GprIndex : FprIndex,
1636  DAG.getConstant(8, MVT::i32), ISD::SETLT);
1637 
1638  // adjustment constant gpr_index * 4/8
1639  SDValue RegConstant = DAG.getNode(ISD::MUL, dl, MVT::i32,
1640  VT.isInteger() ? GprIndex : FprIndex,
1641  DAG.getConstant(VT.isInteger() ? 4 : 8,
1642  MVT::i32));
1643 
1644  // OurReg = RegSaveArea + RegConstant
1645  SDValue OurReg = DAG.getNode(ISD::ADD, dl, PtrVT, RegSaveArea,
1646  RegConstant);
1647 
1648  // Floating types are 32 bytes into RegSaveArea
1649  if (VT.isFloatingPoint())
1650  OurReg = DAG.getNode(ISD::ADD, dl, PtrVT, OurReg,
1651  DAG.getConstant(32, MVT::i32));
1652 
1653  // increase {f,g}pr_index by 1 (or 2 if VT is i64)
1654  SDValue IndexPlus1 = DAG.getNode(ISD::ADD, dl, MVT::i32,
1655  VT.isInteger() ? GprIndex : FprIndex,
1656  DAG.getConstant(VT == MVT::i64 ? 2 : 1,
1657  MVT::i32));
1658 
1659  InChain = DAG.getTruncStore(InChain, dl, IndexPlus1,
1660  VT.isInteger() ? VAListPtr : FprPtr,
1661  MachinePointerInfo(SV),
1662  MVT::i8, false, false, 0);
1663 
1664  // determine if we should load from reg_save_area or overflow_area
1665  SDValue Result = DAG.getNode(ISD::SELECT, dl, PtrVT, CC, OurReg, OverflowArea);
1666 
1667  // increase overflow_area by 4/8 if gpr/fpr > 8
1668  SDValue OverflowAreaPlusN = DAG.getNode(ISD::ADD, dl, PtrVT, OverflowArea,
1669  DAG.getConstant(VT.isInteger() ? 4 : 8,
1670  MVT::i32));
1671 
1672  OverflowArea = DAG.getNode(ISD::SELECT, dl, MVT::i32, CC, OverflowArea,
1673  OverflowAreaPlusN);
1674 
1675  InChain = DAG.getTruncStore(InChain, dl, OverflowArea,
1676  OverflowAreaPtr,
1678  MVT::i32, false, false, 0);
1679 
1680  return DAG.getLoad(VT, dl, InChain, Result, MachinePointerInfo(),
1681  false, false, false, 0);
1682 }
1683 
1684 SDValue PPCTargetLowering::LowerVACOPY(SDValue Op, SelectionDAG &DAG,
1685  const PPCSubtarget &Subtarget) const {
1686  assert(!Subtarget.isPPC64() && "LowerVACOPY is PPC32 only");
1687 
1688  // We have to copy the entire va_list struct:
1689  // 2*sizeof(char) + 2 Byte alignment + 2*sizeof(char*) = 12 Byte
1690  return DAG.getMemcpy(Op.getOperand(0), Op,
1691  Op.getOperand(1), Op.getOperand(2),
1692  DAG.getConstant(12, MVT::i32), 8, false, true,
1694 }
1695 
1696 SDValue PPCTargetLowering::LowerADJUST_TRAMPOLINE(SDValue Op,
1697  SelectionDAG &DAG) const {
1698  return Op.getOperand(0);
1699 }
1700 
1701 SDValue PPCTargetLowering::LowerINIT_TRAMPOLINE(SDValue Op,
1702  SelectionDAG &DAG) const {
1703  SDValue Chain = Op.getOperand(0);
1704  SDValue Trmp = Op.getOperand(1); // trampoline
1705  SDValue FPtr = Op.getOperand(2); // nested function
1706  SDValue Nest = Op.getOperand(3); // 'nest' parameter value
1707  SDLoc dl(Op);
1708 
1709  EVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy();
1710  bool isPPC64 = (PtrVT == MVT::i64);
1711  Type *IntPtrTy =
1713  *DAG.getContext());
1714 
1717 
1718  Entry.Ty = IntPtrTy;
1719  Entry.Node = Trmp; Args.push_back(Entry);
1720 
1721  // TrampSize == (isPPC64 ? 48 : 40);
1722  Entry.Node = DAG.getConstant(isPPC64 ? 48 : 40,
1723  isPPC64 ? MVT::i64 : MVT::i32);
1724  Args.push_back(Entry);
1725 
1726  Entry.Node = FPtr; Args.push_back(Entry);
1727  Entry.Node = Nest; Args.push_back(Entry);
1728 
1729  // Lower to a call to __trampoline_setup(Trmp, TrampSize, FPtr, ctx_reg)
1731  Type::getVoidTy(*DAG.getContext()),
1732  false, false, false, false, 0,
1734  /*isTailCall=*/false,
1735  /*doesNotRet=*/false,
1736  /*isReturnValueUsed=*/true,
1737  DAG.getExternalSymbol("__trampoline_setup", PtrVT),
1738  Args, DAG, dl);
1739  std::pair<SDValue, SDValue> CallResult = LowerCallTo(CLI);
1740 
1741  return CallResult.second;
1742 }
1743 
1744 SDValue PPCTargetLowering::LowerVASTART(SDValue Op, SelectionDAG &DAG,
1745  const PPCSubtarget &Subtarget) const {
1746  MachineFunction &MF = DAG.getMachineFunction();
1747  PPCFunctionInfo *FuncInfo = MF.getInfo<PPCFunctionInfo>();
1748 
1749  SDLoc dl(Op);
1750 
1751  if (Subtarget.isDarwinABI() || Subtarget.isPPC64()) {
1752  // vastart just stores the address of the VarArgsFrameIndex slot into the
1753  // memory location argument.
1754  EVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy();
1755  SDValue FR = DAG.getFrameIndex(FuncInfo->getVarArgsFrameIndex(), PtrVT);
1756  const Value *SV = cast<SrcValueSDNode>(Op.getOperand(2))->getValue();
1757  return DAG.getStore(Op.getOperand(0), dl, FR, Op.getOperand(1),
1758  MachinePointerInfo(SV),
1759  false, false, 0);
1760  }
1761 
1762  // For the 32-bit SVR4 ABI we follow the layout of the va_list struct.
1763  // We suppose the given va_list is already allocated.
1764  //
1765  // typedef struct {
1766  // char gpr; /* index into the array of 8 GPRs
1767  // * stored in the register save area
1768  // * gpr=0 corresponds to r3,
1769  // * gpr=1 to r4, etc.
1770  // */
1771  // char fpr; /* index into the array of 8 FPRs
1772  // * stored in the register save area
1773  // * fpr=0 corresponds to f1,
1774  // * fpr=1 to f2, etc.
1775  // */
1776  // char *overflow_arg_area;
1777  // /* location on stack that holds
1778  // * the next overflow argument
1779  // */
1780  // char *reg_save_area;
1781  // /* where r3:r10 and f1:f8 (if saved)
1782  // * are stored
1783  // */
1784  // } va_list[1];
1785 
1786 
1787  SDValue ArgGPR = DAG.getConstant(FuncInfo->getVarArgsNumGPR(), MVT::i32);
1788  SDValue ArgFPR = DAG.getConstant(FuncInfo->getVarArgsNumFPR(), MVT::i32);
1789 
1790 
1791  EVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy();
1792 
1793  SDValue StackOffsetFI = DAG.getFrameIndex(FuncInfo->getVarArgsStackOffset(),
1794  PtrVT);
1795  SDValue FR = DAG.getFrameIndex(FuncInfo->getVarArgsFrameIndex(),
1796  PtrVT);
1797 
1798  uint64_t FrameOffset = PtrVT.getSizeInBits()/8;
1799  SDValue ConstFrameOffset = DAG.getConstant(FrameOffset, PtrVT);
1800 
1801  uint64_t StackOffset = PtrVT.getSizeInBits()/8 - 1;
1802  SDValue ConstStackOffset = DAG.getConstant(StackOffset, PtrVT);
1803 
1804  uint64_t FPROffset = 1;
1805  SDValue ConstFPROffset = DAG.getConstant(FPROffset, PtrVT);
1806 
1807  const Value *SV = cast<SrcValueSDNode>(Op.getOperand(2))->getValue();
1808 
1809  // Store first byte : number of int regs
1810  SDValue firstStore = DAG.getTruncStore(Op.getOperand(0), dl, ArgGPR,
1811  Op.getOperand(1),
1812  MachinePointerInfo(SV),
1813  MVT::i8, false, false, 0);
1814  uint64_t nextOffset = FPROffset;
1815  SDValue nextPtr = DAG.getNode(ISD::ADD, dl, PtrVT, Op.getOperand(1),
1816  ConstFPROffset);
1817 
1818  // Store second byte : number of float regs
1819  SDValue secondStore =
1820  DAG.getTruncStore(firstStore, dl, ArgFPR, nextPtr,
1821  MachinePointerInfo(SV, nextOffset), MVT::i8,
1822  false, false, 0);
1823  nextOffset += StackOffset;
1824  nextPtr = DAG.getNode(ISD::ADD, dl, PtrVT, nextPtr, ConstStackOffset);
1825 
1826  // Store second word : arguments given on stack
1827  SDValue thirdStore =
1828  DAG.getStore(secondStore, dl, StackOffsetFI, nextPtr,
1829  MachinePointerInfo(SV, nextOffset),
1830  false, false, 0);
1831  nextOffset += FrameOffset;
1832  nextPtr = DAG.getNode(ISD::ADD, dl, PtrVT, nextPtr, ConstFrameOffset);
1833 
1834  // Store third word : arguments given in registers
1835  return DAG.getStore(thirdStore, dl, FR, nextPtr,
1836  MachinePointerInfo(SV, nextOffset),
1837  false, false, 0);
1838 
1839 }
1840 
1841 #include "PPCGenCallingConv.inc"
1842 
1843 // Function whose sole purpose is to kill compiler warnings
1844 // stemming from unused functions included from PPCGenCallingConv.inc.
1845 CCAssignFn *PPCTargetLowering::useFastISelCCs(unsigned Flag) const {
1846  return Flag ? CC_PPC64_ELF_FIS : RetCC_PPC64_ELF_FIS;
1847 }
1848 
1849 bool llvm::CC_PPC32_SVR4_Custom_Dummy(unsigned &ValNo, MVT &ValVT, MVT &LocVT,
1850  CCValAssign::LocInfo &LocInfo,
1851  ISD::ArgFlagsTy &ArgFlags,
1852  CCState &State) {
1853  return true;
1854 }
1855 
1856 bool llvm::CC_PPC32_SVR4_Custom_AlignArgRegs(unsigned &ValNo, MVT &ValVT,
1857  MVT &LocVT,
1858  CCValAssign::LocInfo &LocInfo,
1859  ISD::ArgFlagsTy &ArgFlags,
1860  CCState &State) {
1861  static const uint16_t ArgRegs[] = {
1862  PPC::R3, PPC::R4, PPC::R5, PPC::R6,
1863  PPC::R7, PPC::R8, PPC::R9, PPC::R10,
1864  };
1865  const unsigned NumArgRegs = array_lengthof(ArgRegs);
1866 
1867  unsigned RegNum = State.getFirstUnallocated(ArgRegs, NumArgRegs);
1868 
1869  // Skip one register if the first unallocated register has an even register
1870  // number and there are still argument registers available which have not been
1871  // allocated yet. RegNum is actually an index into ArgRegs, which means we
1872  // need to skip a register if RegNum is odd.
1873  if (RegNum != NumArgRegs && RegNum % 2 == 1) {
1874  State.AllocateReg(ArgRegs[RegNum]);
1875  }
1876 
1877  // Always return false here, as this function only makes sure that the first
1878  // unallocated register has an odd register number and does not actually
1879  // allocate a register for the current argument.
1880  return false;
1881 }
1882 
1883 bool llvm::CC_PPC32_SVR4_Custom_AlignFPArgRegs(unsigned &ValNo, MVT &ValVT,
1884  MVT &LocVT,
1885  CCValAssign::LocInfo &LocInfo,
1886  ISD::ArgFlagsTy &ArgFlags,
1887  CCState &State) {
1888  static const uint16_t ArgRegs[] = {
1889  PPC::F1, PPC::F2, PPC::F3, PPC::F4, PPC::F5, PPC::F6, PPC::F7,
1890  PPC::F8
1891  };
1892 
1893  const unsigned NumArgRegs = array_lengthof(ArgRegs);
1894 
1895  unsigned RegNum = State.getFirstUnallocated(ArgRegs, NumArgRegs);
1896 
1897  // If there is only one Floating-point register left we need to put both f64
1898  // values of a split ppc_fp128 value on the stack.
1899  if (RegNum != NumArgRegs && ArgRegs[RegNum] == PPC::F8) {
1900  State.AllocateReg(ArgRegs[RegNum]);
1901  }
1902 
1903  // Always return false here, as this function only makes sure that the two f64
1904  // values a ppc_fp128 value is split into are both passed in registers or both
1905  // passed on the stack and does not actually allocate a register for the
1906  // current argument.
1907  return false;
1908 }
1909 
1910 /// GetFPR - Get the set of FP registers that should be allocated for arguments,
1911 /// on Darwin.
1912 static const uint16_t *GetFPR() {
1913  static const uint16_t FPR[] = {
1914  PPC::F1, PPC::F2, PPC::F3, PPC::F4, PPC::F5, PPC::F6, PPC::F7,
1915  PPC::F8, PPC::F9, PPC::F10, PPC::F11, PPC::F12, PPC::F13
1916  };
1917 
1918  return FPR;
1919 }
1920 
1921 /// CalculateStackSlotSize - Calculates the size reserved for this argument on
1922 /// the stack.
1924  unsigned PtrByteSize) {
1925  unsigned ArgSize = ArgVT.getSizeInBits()/8;
1926  if (Flags.isByVal())
1927  ArgSize = Flags.getByValSize();
1928  ArgSize = ((ArgSize + PtrByteSize - 1)/PtrByteSize) * PtrByteSize;
1929 
1930  return ArgSize;
1931 }
1932 
1933 SDValue
1934 PPCTargetLowering::LowerFormalArguments(SDValue Chain,
1935  CallingConv::ID CallConv, bool isVarArg,
1937  &Ins,
1938  SDLoc dl, SelectionDAG &DAG,
1939  SmallVectorImpl<SDValue> &InVals)
1940  const {
1941  if (PPCSubTarget.isSVR4ABI()) {
1942  if (PPCSubTarget.isPPC64())
1943  return LowerFormalArguments_64SVR4(Chain, CallConv, isVarArg, Ins,
1944  dl, DAG, InVals);
1945  else
1946  return LowerFormalArguments_32SVR4(Chain, CallConv, isVarArg, Ins,
1947  dl, DAG, InVals);
1948  } else {
1949  return LowerFormalArguments_Darwin(Chain, CallConv, isVarArg, Ins,
1950  dl, DAG, InVals);
1951  }
1952 }
1953 
1954 SDValue
1955 PPCTargetLowering::LowerFormalArguments_32SVR4(
1956  SDValue Chain,
1957  CallingConv::ID CallConv, bool isVarArg,
1959  &Ins,
1960  SDLoc dl, SelectionDAG &DAG,
1961  SmallVectorImpl<SDValue> &InVals) const {
1962 
1963  // 32-bit SVR4 ABI Stack Frame Layout:
1964  // +-----------------------------------+
1965  // +--> | Back chain |
1966  // | +-----------------------------------+
1967  // | | Floating-point register save area |
1968  // | +-----------------------------------+
1969  // | | General register save area |
1970  // | +-----------------------------------+
1971  // | | CR save word |
1972  // | +-----------------------------------+
1973  // | | VRSAVE save word |
1974  // | +-----------------------------------+
1975  // | | Alignment padding |
1976  // | +-----------------------------------+
1977  // | | Vector register save area |
1978  // | +-----------------------------------+
1979  // | | Local variable space |
1980  // | +-----------------------------------+
1981  // | | Parameter list area |
1982  // | +-----------------------------------+
1983  // | | LR save word |
1984  // | +-----------------------------------+
1985  // SP--> +--- | Back chain |
1986  // +-----------------------------------+
1987  //
1988  // Specifications:
1989  // System V Application Binary Interface PowerPC Processor Supplement
1990  // AltiVec Technology Programming Interface Manual
1991 
1992  MachineFunction &MF = DAG.getMachineFunction();
1993  MachineFrameInfo *MFI = MF.getFrameInfo();
1994  PPCFunctionInfo *FuncInfo = MF.getInfo<PPCFunctionInfo>();
1995 
1996  EVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy();
1997  // Potential tail calls could cause overwriting of argument stack slots.
1998  bool isImmutable = !(getTargetMachine().Options.GuaranteedTailCallOpt &&
1999  (CallConv == CallingConv::Fast));
2000  unsigned PtrByteSize = 4;
2001 
2002  // Assign locations to all of the incoming arguments.
2004  CCState CCInfo(CallConv, isVarArg, DAG.getMachineFunction(),
2005  getTargetMachine(), ArgLocs, *DAG.getContext());
2006 
2007  // Reserve space for the linkage area on the stack.
2008  CCInfo.AllocateStack(PPCFrameLowering::getLinkageSize(false, false), PtrByteSize);
2009 
2010  CCInfo.AnalyzeFormalArguments(Ins, CC_PPC32_SVR4);
2011 
2012  for (unsigned i = 0, e = ArgLocs.size(); i != e; ++i) {
2013  CCValAssign &VA = ArgLocs[i];
2014 
2015  // Arguments stored in registers.
2016  if (VA.isRegLoc()) {
2017  const TargetRegisterClass *RC;
2018  EVT ValVT = VA.getValVT();
2019 
2020  switch (ValVT.getSimpleVT().SimpleTy) {
2021  default:
2022  llvm_unreachable("ValVT not supported by formal arguments Lowering");
2023  case MVT::i32:
2024  RC = &PPC::GPRCRegClass;
2025  break;
2026  case MVT::f32:
2027  RC = &PPC::F4RCRegClass;
2028  break;
2029  case MVT::f64:
2030  RC = &PPC::F8RCRegClass;
2031  break;
2032  case MVT::v16i8:
2033  case MVT::v8i16:
2034  case MVT::v4i32:
2035  case MVT::v4f32:
2036  RC = &PPC::VRRCRegClass;
2037  break;
2038  }
2039 
2040  // Transform the arguments stored in physical registers into virtual ones.
2041  unsigned Reg = MF.addLiveIn(VA.getLocReg(), RC);
2042  SDValue ArgValue = DAG.getCopyFromReg(Chain, dl, Reg, ValVT);
2043 
2044  InVals.push_back(ArgValue);
2045  } else {
2046  // Argument stored in memory.
2047  assert(VA.isMemLoc());
2048 
2049  unsigned ArgSize = VA.getLocVT().getSizeInBits() / 8;
2050  int FI = MFI->CreateFixedObject(ArgSize, VA.getLocMemOffset(),
2051  isImmutable);
2052 
2053  // Create load nodes to retrieve arguments from the stack.
2054  SDValue FIN = DAG.getFrameIndex(FI, PtrVT);
2055  InVals.push_back(DAG.getLoad(VA.getValVT(), dl, Chain, FIN,
2057  false, false, false, 0));
2058  }
2059  }
2060 
2061  // Assign locations to all of the incoming aggregate by value arguments.
2062  // Aggregates passed by value are stored in the local variable space of the
2063  // caller's stack frame, right above the parameter list area.
2064  SmallVector<CCValAssign, 16> ByValArgLocs;
2065  CCState CCByValInfo(CallConv, isVarArg, DAG.getMachineFunction(),
2066  getTargetMachine(), ByValArgLocs, *DAG.getContext());
2067 
2068  // Reserve stack space for the allocations in CCInfo.
2069  CCByValInfo.AllocateStack(CCInfo.getNextStackOffset(), PtrByteSize);
2070 
2071  CCByValInfo.AnalyzeFormalArguments(Ins, CC_PPC32_SVR4_ByVal);
2072 
2073  // Area that is at least reserved in the caller of this function.
2074  unsigned MinReservedArea = CCByValInfo.getNextStackOffset();
2075 
2076  // Set the size that is at least reserved in caller of this function. Tail
2077  // call optimized function's reserved stack space needs to be aligned so that
2078  // taking the difference between two stack areas will result in an aligned
2079  // stack.
2081 
2082  MinReservedArea =
2083  std::max(MinReservedArea,
2085 
2086  unsigned TargetAlign = DAG.getMachineFunction().getTarget().getFrameLowering()->
2087  getStackAlignment();
2088  unsigned AlignMask = TargetAlign-1;
2089  MinReservedArea = (MinReservedArea + AlignMask) & ~AlignMask;
2090 
2091  FI->setMinReservedArea(MinReservedArea);
2092 
2093  SmallVector<SDValue, 8> MemOps;
2094 
2095  // If the function takes variable number of arguments, make a frame index for
2096  // the start of the first vararg value... for expansion of llvm.va_start.
2097  if (isVarArg) {
2098  static const uint16_t GPArgRegs[] = {
2099  PPC::R3, PPC::R4, PPC::R5, PPC::R6,
2100  PPC::R7, PPC::R8, PPC::R9, PPC::R10,
2101  };
2102  const unsigned NumGPArgRegs = array_lengthof(GPArgRegs);
2103 
2104  static const uint16_t FPArgRegs[] = {
2105  PPC::F1, PPC::F2, PPC::F3, PPC::F4, PPC::F5, PPC::F6, PPC::F7,
2106  PPC::F8
2107  };
2108  const unsigned NumFPArgRegs = array_lengthof(FPArgRegs);
2109 
2110  FuncInfo->setVarArgsNumGPR(CCInfo.getFirstUnallocated(GPArgRegs,
2111  NumGPArgRegs));
2112  FuncInfo->setVarArgsNumFPR(CCInfo.getFirstUnallocated(FPArgRegs,
2113  NumFPArgRegs));
2114 
2115  // Make room for NumGPArgRegs and NumFPArgRegs.
2116  int Depth = NumGPArgRegs * PtrVT.getSizeInBits()/8 +
2117  NumFPArgRegs * EVT(MVT::f64).getSizeInBits()/8;
2118 
2119  FuncInfo->setVarArgsStackOffset(
2120  MFI->CreateFixedObject(PtrVT.getSizeInBits()/8,
2121  CCInfo.getNextStackOffset(), true));
2122 
2123  FuncInfo->setVarArgsFrameIndex(MFI->CreateStackObject(Depth, 8, false));
2124  SDValue FIN = DAG.getFrameIndex(FuncInfo->getVarArgsFrameIndex(), PtrVT);
2125 
2126  // The fixed integer arguments of a variadic function are stored to the
2127  // VarArgsFrameIndex on the stack so that they may be loaded by deferencing
2128  // the result of va_next.
2129  for (unsigned GPRIndex = 0; GPRIndex != NumGPArgRegs; ++GPRIndex) {
2130  // Get an existing live-in vreg, or add a new one.
2131  unsigned VReg = MF.getRegInfo().getLiveInVirtReg(GPArgRegs[GPRIndex]);
2132  if (!VReg)
2133  VReg = MF.addLiveIn(GPArgRegs[GPRIndex], &PPC::GPRCRegClass);
2134 
2135  SDValue Val = DAG.getCopyFromReg(Chain, dl, VReg, PtrVT);
2136  SDValue Store = DAG.getStore(Val.getValue(1), dl, Val, FIN,
2137  MachinePointerInfo(), false, false, 0);
2138  MemOps.push_back(Store);
2139  // Increment the address by four for the next argument to store
2140  SDValue PtrOff = DAG.getConstant(PtrVT.getSizeInBits()/8, PtrVT);
2141  FIN = DAG.getNode(ISD::ADD, dl, PtrOff.getValueType(), FIN, PtrOff);
2142  }
2143 
2144  // FIXME 32-bit SVR4: We only need to save FP argument registers if CR bit 6
2145  // is set.
2146  // The double arguments are stored to the VarArgsFrameIndex
2147  // on the stack.
2148  for (unsigned FPRIndex = 0; FPRIndex != NumFPArgRegs; ++FPRIndex) {
2149  // Get an existing live-in vreg, or add a new one.
2150  unsigned VReg = MF.getRegInfo().getLiveInVirtReg(FPArgRegs[FPRIndex]);
2151  if (!VReg)
2152  VReg = MF.addLiveIn(FPArgRegs[FPRIndex], &PPC::F8RCRegClass);
2153 
2154  SDValue Val = DAG.getCopyFromReg(Chain, dl, VReg, MVT::f64);
2155  SDValue Store = DAG.getStore(Val.getValue(1), dl, Val, FIN,
2156  MachinePointerInfo(), false, false, 0);
2157  MemOps.push_back(Store);
2158  // Increment the address by eight for the next argument to store
2159  SDValue PtrOff = DAG.getConstant(EVT(MVT::f64).getSizeInBits()/8,
2160  PtrVT);
2161  FIN = DAG.getNode(ISD::ADD, dl, PtrOff.getValueType(), FIN, PtrOff);
2162  }
2163  }
2164 
2165  if (!MemOps.empty())
2166  Chain = DAG.getNode(ISD::TokenFactor, dl,
2167  MVT::Other, &MemOps[0], MemOps.size());
2168 
2169  return Chain;
2170 }
2171 
2172 // PPC64 passes i8, i16, and i32 values in i64 registers. Promote
2173 // value to MVT::i64 and then truncate to the correct register size.
2174 SDValue
2175 PPCTargetLowering::extendArgForPPC64(ISD::ArgFlagsTy Flags, EVT ObjectVT,
2176  SelectionDAG &DAG, SDValue ArgVal,
2177  SDLoc dl) const {
2178  if (Flags.isSExt())
2179  ArgVal = DAG.getNode(ISD::AssertSext, dl, MVT::i64, ArgVal,
2180  DAG.getValueType(ObjectVT));
2181  else if (Flags.isZExt())
2182  ArgVal = DAG.getNode(ISD::AssertZext, dl, MVT::i64, ArgVal,
2183  DAG.getValueType(ObjectVT));
2184 
2185  return DAG.getNode(ISD::TRUNCATE, dl, MVT::i32, ArgVal);
2186 }
2187 
2188 // Set the size that is at least reserved in caller of this function. Tail
2189 // call optimized functions' reserved stack space needs to be aligned so that
2190 // taking the difference between two stack areas will result in an aligned
2191 // stack.
2192 void
2193 PPCTargetLowering::setMinReservedArea(MachineFunction &MF, SelectionDAG &DAG,
2194  unsigned nAltivecParamsAtEnd,
2195  unsigned MinReservedArea,
2196  bool isPPC64) const {
2198  // Add the Altivec parameters at the end, if needed.
2199  if (nAltivecParamsAtEnd) {
2200  MinReservedArea = ((MinReservedArea+15)/16)*16;
2201  MinReservedArea += 16*nAltivecParamsAtEnd;
2202  }
2203  MinReservedArea =
2204  std::max(MinReservedArea,
2205  PPCFrameLowering::getMinCallFrameSize(isPPC64, true));
2206  unsigned TargetAlign
2208  getStackAlignment();
2209  unsigned AlignMask = TargetAlign-1;
2210  MinReservedArea = (MinReservedArea + AlignMask) & ~AlignMask;
2211  FI->setMinReservedArea(MinReservedArea);
2212 }
2213 
2214 SDValue
2215 PPCTargetLowering::LowerFormalArguments_64SVR4(
2216  SDValue Chain,
2217  CallingConv::ID CallConv, bool isVarArg,
2219  &Ins,
2220  SDLoc dl, SelectionDAG &DAG,
2221  SmallVectorImpl<SDValue> &InVals) const {
2222  // TODO: add description of PPC stack frame format, or at least some docs.
2223  //
2224  MachineFunction &MF = DAG.getMachineFunction();
2225  MachineFrameInfo *MFI = MF.getFrameInfo();
2226  PPCFunctionInfo *FuncInfo = MF.getInfo<PPCFunctionInfo>();
2227 
2228  EVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy();
2229  // Potential tail calls could cause overwriting of argument stack slots.
2230  bool isImmutable = !(getTargetMachine().Options.GuaranteedTailCallOpt &&
2231  (CallConv == CallingConv::Fast));
2232  unsigned PtrByteSize = 8;
2233 
2234  unsigned ArgOffset = PPCFrameLowering::getLinkageSize(true, true);
2235  // Area that is at least reserved in caller of this function.
2236  unsigned MinReservedArea = ArgOffset;
2237 
2238  static const uint16_t GPR[] = {
2239  PPC::X3, PPC::X4, PPC::X5, PPC::X6,
2240  PPC::X7, PPC::X8, PPC::X9, PPC::X10,
2241  };
2242 
2243  static const uint16_t *FPR = GetFPR();
2244 
2245  static const uint16_t VR[] = {
2246  PPC::V2, PPC::V3, PPC::V4, PPC::V5, PPC::V6, PPC::V7, PPC::V8,
2247  PPC::V9, PPC::V10, PPC::V11, PPC::V12, PPC::V13
2248  };
2249 
2250  const unsigned Num_GPR_Regs = array_lengthof(GPR);
2251  const unsigned Num_FPR_Regs = 13;
2252  const unsigned Num_VR_Regs = array_lengthof(VR);
2253 
2254  unsigned GPR_idx = 0, FPR_idx = 0, VR_idx = 0;
2255 
2256  // Add DAG nodes to load the arguments or copy them out of registers. On
2257  // entry to a function on PPC, the arguments start after the linkage area,
2258  // although the first ones are often in registers.
2259 
2260  SmallVector<SDValue, 8> MemOps;
2261  unsigned nAltivecParamsAtEnd = 0;
2263  unsigned CurArgIdx = 0;
2264  for (unsigned ArgNo = 0, e = Ins.size(); ArgNo != e; ++ArgNo) {
2265  SDValue ArgVal;
2266  bool needsLoad = false;
2267  EVT ObjectVT = Ins[ArgNo].VT;
2268  unsigned ObjSize = ObjectVT.getSizeInBits()/8;
2269  unsigned ArgSize = ObjSize;
2270  ISD::ArgFlagsTy Flags = Ins[ArgNo].Flags;
2271  std::advance(FuncArg, Ins[ArgNo].OrigArgIndex - CurArgIdx);
2272  CurArgIdx = Ins[ArgNo].OrigArgIndex;
2273 
2274  unsigned CurArgOffset = ArgOffset;
2275 
2276  // Varargs or 64 bit Altivec parameters are padded to a 16 byte boundary.
2277  if (ObjectVT==MVT::v4f32 || ObjectVT==MVT::v4i32 ||
2278  ObjectVT==MVT::v8i16 || ObjectVT==MVT::v16i8) {
2279  if (isVarArg) {
2280  MinReservedArea = ((MinReservedArea+15)/16)*16;
2281  MinReservedArea += CalculateStackSlotSize(ObjectVT,
2282  Flags,
2283  PtrByteSize);
2284  } else
2285  nAltivecParamsAtEnd++;
2286  } else
2287  // Calculate min reserved area.
2288  MinReservedArea += CalculateStackSlotSize(Ins[ArgNo].VT,
2289  Flags,
2290  PtrByteSize);
2291 
2292  // FIXME the codegen can be much improved in some cases.
2293  // We do not have to keep everything in memory.
2294  if (Flags.isByVal()) {
2295  // ObjSize is the true size, ArgSize rounded up to multiple of registers.
2296  ObjSize = Flags.getByValSize();
2297  ArgSize = ((ObjSize + PtrByteSize - 1)/PtrByteSize) * PtrByteSize;
2298  // Empty aggregate parameters do not take up registers. Examples:
2299  // struct { } a;
2300  // union { } b;
2301  // int c[0];
2302  // etc. However, we have to provide a place-holder in InVals, so
2303  // pretend we have an 8-byte item at the current address for that
2304  // purpose.
2305  if (!ObjSize) {
2306  int FI = MFI->CreateFixedObject(PtrByteSize, ArgOffset, true);
2307  SDValue FIN = DAG.getFrameIndex(FI, PtrVT);
2308  InVals.push_back(FIN);
2309  continue;
2310  }
2311 
2312  unsigned BVAlign = Flags.getByValAlign();
2313  if (BVAlign > 8) {
2314  ArgOffset = ((ArgOffset+BVAlign-1)/BVAlign)*BVAlign;
2315  CurArgOffset = ArgOffset;
2316  }
2317 
2318  // All aggregates smaller than 8 bytes must be passed right-justified.
2319  if (ObjSize < PtrByteSize)
2320  CurArgOffset = CurArgOffset + (PtrByteSize - ObjSize);
2321  // The value of the object is its address.
2322  int FI = MFI->CreateFixedObject(ObjSize, CurArgOffset, true);
2323  SDValue FIN = DAG.getFrameIndex(FI, PtrVT);
2324  InVals.push_back(FIN);
2325 
2326  if (ObjSize < 8) {
2327  if (GPR_idx != Num_GPR_Regs) {
2328  unsigned VReg = MF.addLiveIn(GPR[GPR_idx], &PPC::G8RCRegClass);
2329  SDValue Val = DAG.getCopyFromReg(Chain, dl, VReg, PtrVT);
2330  SDValue Store;
2331 
2332  if (ObjSize==1 || ObjSize==2 || ObjSize==4) {
2333  EVT ObjType = (ObjSize == 1 ? MVT::i8 :
2334  (ObjSize == 2 ? MVT::i16 : MVT::i32));
2335  Store = DAG.getTruncStore(Val.getValue(1), dl, Val, FIN,
2336  MachinePointerInfo(FuncArg, CurArgOffset),
2337  ObjType, false, false, 0);
2338  } else {
2339  // For sizes that don't fit a truncating store (3, 5, 6, 7),
2340  // store the whole register as-is to the parameter save area
2341  // slot. The address of the parameter was already calculated
2342  // above (InVals.push_back(FIN)) to be the right-justified
2343  // offset within the slot. For this store, we need a new
2344  // frame index that points at the beginning of the slot.
2345  int FI = MFI->CreateFixedObject(PtrByteSize, ArgOffset, true);
2346  SDValue FIN = DAG.getFrameIndex(FI, PtrVT);
2347  Store = DAG.getStore(Val.getValue(1), dl, Val, FIN,
2348  MachinePointerInfo(FuncArg, ArgOffset),
2349  false, false, 0);
2350  }
2351 
2352  MemOps.push_back(Store);
2353  ++GPR_idx;
2354  }
2355  // Whether we copied from a register or not, advance the offset
2356  // into the parameter save area by a full doubleword.
2357  ArgOffset += PtrByteSize;
2358  continue;
2359  }
2360 
2361  for (unsigned j = 0; j < ArgSize; j += PtrByteSize) {
2362  // Store whatever pieces of the object are in registers
2363  // to memory. ArgOffset will be the address of the beginning
2364  // of the object.
2365  if (GPR_idx != Num_GPR_Regs) {
2366  unsigned VReg;
2367  VReg = MF.addLiveIn(GPR[GPR_idx], &PPC::G8RCRegClass);
2368  int FI = MFI->CreateFixedObject(PtrByteSize, ArgOffset, true);
2369  SDValue FIN = DAG.getFrameIndex(FI, PtrVT);
2370  SDValue Val = DAG.getCopyFromReg(Chain, dl, VReg, PtrVT);
2371  SDValue Store = DAG.getStore(Val.getValue(1), dl, Val, FIN,
2372  MachinePointerInfo(FuncArg, ArgOffset),
2373  false, false, 0);
2374  MemOps.push_back(Store);
2375  ++GPR_idx;
2376  ArgOffset += PtrByteSize;
2377  } else {
2378  ArgOffset += ArgSize - j;
2379  break;
2380  }
2381  }
2382  continue;
2383  }
2384 
2385  switch (ObjectVT.getSimpleVT().SimpleTy) {
2386  default: llvm_unreachable("Unhandled argument type!");
2387  case MVT::i32:
2388  case MVT::i64:
2389  if (GPR_idx != Num_GPR_Regs) {
2390  unsigned VReg = MF.addLiveIn(GPR[GPR_idx], &PPC::G8RCRegClass);
2391  ArgVal = DAG.getCopyFromReg(Chain, dl, VReg, MVT::i64);
2392 
2393  if (ObjectVT == MVT::i32)
2394  // PPC64 passes i8, i16, and i32 values in i64 registers. Promote
2395  // value to MVT::i64 and then truncate to the correct register size.
2396  ArgVal = extendArgForPPC64(Flags, ObjectVT, DAG, ArgVal, dl);
2397 
2398  ++GPR_idx;
2399  } else {
2400  needsLoad = true;
2401  ArgSize = PtrByteSize;
2402  }
2403  ArgOffset += 8;
2404  break;
2405 
2406  case MVT::f32:
2407  case MVT::f64:
2408  // Every 8 bytes of argument space consumes one of the GPRs available for
2409  // argument passing.
2410  if (GPR_idx != Num_GPR_Regs) {
2411  ++GPR_idx;
2412  }
2413  if (FPR_idx != Num_FPR_Regs) {
2414  unsigned VReg;
2415 
2416  if (ObjectVT == MVT::f32)
2417  VReg = MF.addLiveIn(FPR[FPR_idx], &PPC::F4RCRegClass);
2418  else
2419  VReg = MF.addLiveIn(FPR[FPR_idx], &PPC::F8RCRegClass);
2420 
2421  ArgVal = DAG.getCopyFromReg(Chain, dl, VReg, ObjectVT);
2422  ++FPR_idx;
2423  } else {
2424  needsLoad = true;
2425  ArgSize = PtrByteSize;
2426  }
2427 
2428  ArgOffset += 8;
2429  break;
2430  case MVT::v4f32:
2431  case MVT::v4i32:
2432  case MVT::v8i16:
2433  case MVT::v16i8:
2434  // Note that vector arguments in registers don't reserve stack space,
2435  // except in varargs functions.
2436  if (VR_idx != Num_VR_Regs) {
2437  unsigned VReg = MF.addLiveIn(VR[VR_idx], &PPC::VRRCRegClass);
2438  ArgVal = DAG.getCopyFromReg(Chain, dl, VReg, ObjectVT);
2439  if (isVarArg) {
2440  while ((ArgOffset % 16) != 0) {
2441  ArgOffset += PtrByteSize;
2442  if (GPR_idx != Num_GPR_Regs)
2443  GPR_idx++;
2444  }
2445  ArgOffset += 16;
2446  GPR_idx = std::min(GPR_idx+4, Num_GPR_Regs); // FIXME correct for ppc64?
2447  }
2448  ++VR_idx;
2449  } else {
2450  // Vectors are aligned.
2451  ArgOffset = ((ArgOffset+15)/16)*16;
2452  CurArgOffset = ArgOffset;
2453  ArgOffset += 16;
2454  needsLoad = true;
2455  }
2456  break;
2457  }
2458 
2459  // We need to load the argument to a virtual register if we determined
2460  // above that we ran out of physical registers of the appropriate type.
2461  if (needsLoad) {
2462  int FI = MFI->CreateFixedObject(ObjSize,
2463  CurArgOffset + (ArgSize - ObjSize),
2464  isImmutable);
2465  SDValue FIN = DAG.getFrameIndex(FI, PtrVT);
2466  ArgVal = DAG.getLoad(ObjectVT, dl, Chain, FIN, MachinePointerInfo(),
2467  false, false, false, 0);
2468  }
2469 
2470  InVals.push_back(ArgVal);
2471  }
2472 
2473  // Set the size that is at least reserved in caller of this function. Tail
2474  // call optimized functions' reserved stack space needs to be aligned so that
2475  // taking the difference between two stack areas will result in an aligned
2476  // stack.
2477  setMinReservedArea(MF, DAG, nAltivecParamsAtEnd, MinReservedArea, true);
2478 
2479  // If the function takes variable number of arguments, make a frame index for
2480  // the start of the first vararg value... for expansion of llvm.va_start.
2481  if (isVarArg) {
2482  int Depth = ArgOffset;
2483 
2484  FuncInfo->setVarArgsFrameIndex(
2485  MFI->CreateFixedObject(PtrByteSize, Depth, true));
2486  SDValue FIN = DAG.getFrameIndex(FuncInfo->getVarArgsFrameIndex(), PtrVT);
2487 
2488  // If this function is vararg, store any remaining integer argument regs
2489  // to their spots on the stack so that they may be loaded by deferencing the
2490  // result of va_next.
2491  for (; GPR_idx != Num_GPR_Regs; ++GPR_idx) {
2492  unsigned VReg = MF.addLiveIn(GPR[GPR_idx], &PPC::G8RCRegClass);
2493  SDValue Val = DAG.getCopyFromReg(Chain, dl, VReg, PtrVT);
2494  SDValue Store = DAG.getStore(Val.getValue(1), dl, Val, FIN,
2495  MachinePointerInfo(), false, false, 0);
2496  MemOps.push_back(Store);
2497  // Increment the address by four for the next argument to store
2498  SDValue PtrOff = DAG.getConstant(PtrByteSize, PtrVT);
2499  FIN = DAG.getNode(ISD::ADD, dl, PtrOff.getValueType(), FIN, PtrOff);
2500  }
2501  }
2502 
2503  if (!MemOps.empty())
2504  Chain = DAG.getNode(ISD::TokenFactor, dl,
2505  MVT::Other, &MemOps[0], MemOps.size());
2506 
2507  return Chain;
2508 }
2509 
2510 SDValue
2511 PPCTargetLowering::LowerFormalArguments_Darwin(
2512  SDValue Chain,
2513  CallingConv::ID CallConv, bool isVarArg,
2515  &Ins,
2516  SDLoc dl, SelectionDAG &DAG,
2517  SmallVectorImpl<SDValue> &InVals) const {
2518  // TODO: add description of PPC stack frame format, or at least some docs.
2519  //
2520  MachineFunction &MF = DAG.getMachineFunction();
2521  MachineFrameInfo *MFI = MF.getFrameInfo();
2522  PPCFunctionInfo *FuncInfo = MF.getInfo<PPCFunctionInfo>();
2523 
2524  EVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy();
2525  bool isPPC64 = PtrVT == MVT::i64;
2526  // Potential tail calls could cause overwriting of argument stack slots.
2527  bool isImmutable = !(getTargetMachine().Options.GuaranteedTailCallOpt &&
2528  (CallConv == CallingConv::Fast));
2529  unsigned PtrByteSize = isPPC64 ? 8 : 4;
2530 
2531  unsigned ArgOffset = PPCFrameLowering::getLinkageSize(isPPC64, true);
2532  // Area that is at least reserved in caller of this function.
2533  unsigned MinReservedArea = ArgOffset;
2534 
2535  static const uint16_t GPR_32[] = { // 32-bit registers.
2536  PPC::R3, PPC::R4, PPC::R5, PPC::R6,
2537  PPC::R7, PPC::R8, PPC::R9, PPC::R10,
2538  };
2539  static const uint16_t GPR_64[] = { // 64-bit registers.
2540  PPC::X3, PPC::X4, PPC::X5, PPC::X6,
2541  PPC::X7, PPC::X8, PPC::X9, PPC::X10,
2542  };
2543 
2544  static const uint16_t *FPR = GetFPR();
2545 
2546  static const uint16_t VR[] = {
2547  PPC::V2, PPC::V3, PPC::V4, PPC::V5, PPC::V6, PPC::V7, PPC::V8,
2548  PPC::V9, PPC::V10, PPC::V11, PPC::V12, PPC::V13
2549  };
2550 
2551  const unsigned Num_GPR_Regs = array_lengthof(GPR_32);
2552  const unsigned Num_FPR_Regs = 13;
2553  const unsigned Num_VR_Regs = array_lengthof( VR);
2554 
2555  unsigned GPR_idx = 0, FPR_idx = 0, VR_idx = 0;
2556 
2557  const uint16_t *GPR = isPPC64 ? GPR_64 : GPR_32;
2558 
2559  // In 32-bit non-varargs functions, the stack space for vectors is after the
2560  // stack space for non-vectors. We do not use this space unless we have
2561  // too many vectors to fit in registers, something that only occurs in
2562  // constructed examples:), but we have to walk the arglist to figure
2563  // that out...for the pathological case, compute VecArgOffset as the
2564  // start of the vector parameter area. Computing VecArgOffset is the
2565  // entire point of the following loop.
2566  unsigned VecArgOffset = ArgOffset;
2567  if (!isVarArg && !isPPC64) {
2568  for (unsigned ArgNo = 0, e = Ins.size(); ArgNo != e;
2569  ++ArgNo) {
2570  EVT ObjectVT = Ins[ArgNo].VT;
2571  ISD::ArgFlagsTy Flags = Ins[ArgNo].Flags;
2572 
2573  if (Flags.isByVal()) {
2574  // ObjSize is the true size, ArgSize rounded up to multiple of regs.
2575  unsigned ObjSize = Flags.getByValSize();
2576  unsigned ArgSize =
2577  ((ObjSize + PtrByteSize - 1)/PtrByteSize) * PtrByteSize;
2578  VecArgOffset += ArgSize;
2579  continue;
2580  }
2581 
2582  switch(ObjectVT.getSimpleVT().SimpleTy) {
2583  default: llvm_unreachable("Unhandled argument type!");
2584  case MVT::i32:
2585  case MVT::f32:
2586  VecArgOffset += 4;
2587  break;
2588  case MVT::i64: // PPC64
2589  case MVT::f64:
2590  // FIXME: We are guaranteed to be !isPPC64 at this point.
2591  // Does MVT::i64 apply?
2592  VecArgOffset += 8;
2593  break;
2594  case MVT::v4f32:
2595  case MVT::v4i32:
2596  case MVT::v8i16:
2597  case MVT::v16i8:
2598  // Nothing to do, we're only looking at Nonvector args here.
2599  break;
2600  }
2601  }
2602  }
2603  // We've found where the vector parameter area in memory is. Skip the
2604  // first 12 parameters; these don't use that memory.
2605  VecArgOffset = ((VecArgOffset+15)/16)*16;
2606  VecArgOffset += 12*16;
2607 
2608  // Add DAG nodes to load the arguments or copy them out of registers. On
2609  // entry to a function on PPC, the arguments start after the linkage area,
2610  // although the first ones are often in registers.
2611 
2612  SmallVector<SDValue, 8> MemOps;
2613  unsigned nAltivecParamsAtEnd = 0;
2615  unsigned CurArgIdx = 0;
2616  for (unsigned ArgNo = 0, e = Ins.size(); ArgNo != e; ++ArgNo) {
2617  SDValue ArgVal;
2618  bool needsLoad = false;
2619  EVT ObjectVT = Ins[ArgNo].VT;
2620  unsigned ObjSize = ObjectVT.getSizeInBits()/8;
2621  unsigned ArgSize = ObjSize;
2622  ISD::ArgFlagsTy Flags = Ins[ArgNo].Flags;
2623  std::advance(FuncArg, Ins[ArgNo].OrigArgIndex - CurArgIdx);
2624  CurArgIdx = Ins[ArgNo].OrigArgIndex;
2625 
2626  unsigned CurArgOffset = ArgOffset;
2627 
2628  // Varargs or 64 bit Altivec parameters are padded to a 16 byte boundary.
2629  if (ObjectVT==MVT::v4f32 || ObjectVT==MVT::v4i32 ||
2630  ObjectVT==MVT::v8i16 || ObjectVT==MVT::v16i8) {
2631  if (isVarArg || isPPC64) {
2632  MinReservedArea = ((MinReservedArea+15)/16)*16;
2633  MinReservedArea += CalculateStackSlotSize(ObjectVT,
2634  Flags,
2635  PtrByteSize);
2636  } else nAltivecParamsAtEnd++;
2637  } else
2638  // Calculate min reserved area.
2639  MinReservedArea += CalculateStackSlotSize(Ins[ArgNo].VT,
2640  Flags,
2641  PtrByteSize);
2642 
2643  // FIXME the codegen can be much improved in some cases.
2644  // We do not have to keep everything in memory.
2645  if (Flags.isByVal()) {
2646  // ObjSize is the true size, ArgSize rounded up to multiple of registers.
2647  ObjSize = Flags.getByValSize();
2648  ArgSize = ((ObjSize + PtrByteSize - 1)/PtrByteSize) * PtrByteSize;
2649  // Objects of size 1 and 2 are right justified, everything else is
2650  // left justified. This means the memory address is adjusted forwards.
2651  if (ObjSize==1 || ObjSize==2) {
2652  CurArgOffset = CurArgOffset + (4 - ObjSize);
2653  }
2654  // The value of the object is its address.
2655  int FI = MFI->CreateFixedObject(ObjSize, CurArgOffset, true);
2656  SDValue FIN = DAG.getFrameIndex(FI, PtrVT);
2657  InVals.push_back(FIN);
2658  if (ObjSize==1 || ObjSize==2) {
2659  if (GPR_idx != Num_GPR_Regs) {
2660  unsigned VReg;
2661  if (isPPC64)
2662  VReg = MF.addLiveIn(GPR[GPR_idx], &PPC::G8RCRegClass);
2663  else
2664  VReg = MF.addLiveIn(GPR[GPR_idx], &PPC::GPRCRegClass);
2665  SDValue Val = DAG.getCopyFromReg(Chain, dl, VReg, PtrVT);
2666  EVT ObjType = ObjSize == 1 ? MVT::i8 : MVT::i16;
2667  SDValue Store = DAG.getTruncStore(Val.getValue(1), dl, Val, FIN,
2668  MachinePointerInfo(FuncArg,
2669  CurArgOffset),
2670  ObjType, false, false, 0);
2671  MemOps.push_back(Store);
2672  ++GPR_idx;
2673  }
2674 
2675  ArgOffset += PtrByteSize;
2676 
2677  continue;
2678  }
2679  for (unsigned j = 0; j < ArgSize; j += PtrByteSize) {
2680  // Store whatever pieces of the object are in registers
2681  // to memory. ArgOffset will be the address of the beginning
2682  // of the object.
2683  if (GPR_idx != Num_GPR_Regs) {
2684  unsigned VReg;
2685  if (isPPC64)
2686  VReg = MF.addLiveIn(GPR[GPR_idx], &PPC::G8RCRegClass);
2687  else
2688  VReg = MF.addLiveIn(GPR[GPR_idx], &PPC::GPRCRegClass);
2689  int FI = MFI->CreateFixedObject(PtrByteSize, ArgOffset, true);
2690  SDValue FIN = DAG.getFrameIndex(FI, PtrVT);
2691  SDValue Val = DAG.getCopyFromReg(Chain, dl, VReg, PtrVT);
2692  SDValue Store = DAG.getStore(Val.getValue(1), dl, Val, FIN,
2693  MachinePointerInfo(FuncArg, ArgOffset),
2694  false, false, 0);
2695  MemOps.push_back(Store);
2696  ++GPR_idx;
2697  ArgOffset += PtrByteSize;
2698  } else {
2699  ArgOffset += ArgSize - (ArgOffset-CurArgOffset);
2700  break;
2701  }
2702  }
2703  continue;
2704  }
2705 
2706  switch (ObjectVT.getSimpleVT().SimpleTy) {
2707  default: llvm_unreachable("Unhandled argument type!");
2708  case MVT::i32:
2709  if (!isPPC64) {
2710  if (GPR_idx != Num_GPR_Regs) {
2711  unsigned VReg = MF.addLiveIn(GPR[GPR_idx], &PPC::GPRCRegClass);
2712  ArgVal = DAG.getCopyFromReg(Chain, dl, VReg, MVT::i32);
2713  ++GPR_idx;
2714  } else {
2715  needsLoad = true;
2716  ArgSize = PtrByteSize;
2717  }
2718  // All int arguments reserve stack space in the Darwin ABI.
2719  ArgOffset += PtrByteSize;
2720  break;
2721  }
2722  // FALLTHROUGH
2723  case MVT::i64: // PPC64
2724  if (GPR_idx != Num_GPR_Regs) {
2725  unsigned VReg = MF.addLiveIn(GPR[GPR_idx], &PPC::G8RCRegClass);
2726  ArgVal = DAG.getCopyFromReg(Chain, dl, VReg, MVT::i64);
2727 
2728  if (ObjectVT == MVT::i32)
2729  // PPC64 passes i8, i16, and i32 values in i64 registers. Promote
2730  // value to MVT::i64 and then truncate to the correct register size.
2731  ArgVal = extendArgForPPC64(Flags, ObjectVT, DAG, ArgVal, dl);
2732 
2733  ++GPR_idx;
2734  } else {
2735  needsLoad = true;
2736  ArgSize = PtrByteSize;
2737  }
2738  // All int arguments reserve stack space in the Darwin ABI.
2739  ArgOffset += 8;
2740  break;
2741 
2742  case MVT::f32:
2743  case MVT::f64:
2744  // Every 4 bytes of argument space consumes one of the GPRs available for
2745  // argument passing.
2746  if (GPR_idx != Num_GPR_Regs) {
2747  ++GPR_idx;
2748  if (ObjSize == 8 && GPR_idx != Num_GPR_Regs && !isPPC64)
2749  ++GPR_idx;
2750  }
2751  if (FPR_idx != Num_FPR_Regs) {
2752  unsigned VReg;
2753 
2754  if (ObjectVT == MVT::f32)
2755  VReg = MF.addLiveIn(FPR[FPR_idx], &PPC::F4RCRegClass);
2756  else
2757  VReg = MF.addLiveIn(FPR[FPR_idx], &PPC::F8RCRegClass);
2758 
2759  ArgVal = DAG.getCopyFromReg(Chain, dl, VReg, ObjectVT);
2760  ++FPR_idx;
2761  } else {
2762  needsLoad = true;
2763  }
2764 
2765  // All FP arguments reserve stack space in the Darwin ABI.
2766  ArgOffset += isPPC64 ? 8 : ObjSize;
2767  break;
2768  case MVT::v4f32:
2769  case MVT::v4i32:
2770  case MVT::v8i16:
2771  case MVT::v16i8:
2772  // Note that vector arguments in registers don't reserve stack space,
2773  // except in varargs functions.
2774  if (VR_idx != Num_VR_Regs) {
2775  unsigned VReg = MF.addLiveIn(VR[VR_idx], &PPC::VRRCRegClass);
2776  ArgVal = DAG.getCopyFromReg(Chain, dl, VReg, ObjectVT);
2777  if (isVarArg) {
2778  while ((ArgOffset % 16) != 0) {
2779  ArgOffset += PtrByteSize;
2780  if (GPR_idx != Num_GPR_Regs)
2781  GPR_idx++;
2782  }
2783  ArgOffset += 16;
2784  GPR_idx = std::min(GPR_idx+4, Num_GPR_Regs); // FIXME correct for ppc64?
2785  }
2786  ++VR_idx;
2787  } else {
2788  if (!isVarArg && !isPPC64) {
2789  // Vectors go after all the nonvectors.
2790  CurArgOffset = VecArgOffset;
2791  VecArgOffset += 16;
2792  } else {
2793  // Vectors are aligned.
2794  ArgOffset = ((ArgOffset+15)/16)*16;
2795  CurArgOffset = ArgOffset;
2796  ArgOffset += 16;
2797  }
2798  needsLoad = true;
2799  }
2800  break;
2801  }
2802 
2803  // We need to load the argument to a virtual register if we determined above
2804  // that we ran out of physical registers of the appropriate type.
2805  if (needsLoad) {
2806  int FI = MFI->CreateFixedObject(ObjSize,
2807  CurArgOffset + (ArgSize - ObjSize),
2808  isImmutable);
2809  SDValue FIN = DAG.getFrameIndex(FI, PtrVT);
2810  ArgVal = DAG.getLoad(ObjectVT, dl, Chain, FIN, MachinePointerInfo(),
2811  false, false, false, 0);
2812  }
2813 
2814  InVals.push_back(ArgVal);
2815  }
2816 
2817  // Set the size that is at least reserved in caller of this function. Tail
2818  // call optimized functions' reserved stack space needs to be aligned so that
2819  // taking the difference between two stack areas will result in an aligned
2820  // stack.
2821  setMinReservedArea(MF, DAG, nAltivecParamsAtEnd, MinReservedArea, isPPC64);
2822 
2823  // If the function takes variable number of arguments, make a frame index for
2824  // the start of the first vararg value... for expansion of llvm.va_start.
2825  if (isVarArg) {
2826  int Depth = ArgOffset;
2827 
2828  FuncInfo->setVarArgsFrameIndex(
2829  MFI->CreateFixedObject(PtrVT.getSizeInBits()/8,
2830  Depth, true));
2831  SDValue FIN = DAG.getFrameIndex(FuncInfo->getVarArgsFrameIndex(), PtrVT);
2832 
2833  // If this function is vararg, store any remaining integer argument regs
2834  // to their spots on the stack so that they may be loaded by deferencing the
2835  // result of va_next.
2836  for (; GPR_idx != Num_GPR_Regs; ++GPR_idx) {
2837  unsigned VReg;
2838 
2839  if (isPPC64)
2840  VReg = MF.addLiveIn(GPR[GPR_idx], &PPC::G8RCRegClass);
2841  else
2842  VReg = MF.addLiveIn(GPR[GPR_idx], &PPC::GPRCRegClass);
2843 
2844  SDValue Val = DAG.getCopyFromReg(Chain, dl, VReg, PtrVT);
2845  SDValue Store = DAG.getStore(Val.getValue(1), dl, Val, FIN,
2846  MachinePointerInfo(), false, false, 0);
2847  MemOps.push_back(Store);
2848  // Increment the address by four for the next argument to store
2849  SDValue PtrOff = DAG.getConstant(PtrVT.getSizeInBits()/8, PtrVT);
2850  FIN = DAG.getNode(ISD::ADD, dl, PtrOff.getValueType(), FIN, PtrOff);
2851  }
2852  }
2853 
2854  if (!MemOps.empty())
2855  Chain = DAG.getNode(ISD::TokenFactor, dl,
2856  MVT::Other, &MemOps[0], MemOps.size());
2857 
2858  return Chain;
2859 }
2860 
2861 /// CalculateParameterAndLinkageAreaSize - Get the size of the parameter plus
2862 /// linkage area for the Darwin ABI, or the 64-bit SVR4 ABI.
2863 static unsigned
2865  bool isPPC64,
2866  bool isVarArg,
2867  unsigned CC,
2869  &Outs,
2870  const SmallVectorImpl<SDValue> &OutVals,
2871  unsigned &nAltivecParamsAtEnd) {
2872  // Count how many bytes are to be pushed on the stack, including the linkage
2873  // area, and parameter passing area. We start with 24/48 bytes, which is
2874  // prereserved space for [SP][CR][LR][3 x unused].
2875  unsigned NumBytes = PPCFrameLowering::getLinkageSize(isPPC64, true);
2876  unsigned NumOps = Outs.size();
2877  unsigned PtrByteSize = isPPC64 ? 8 : 4;
2878 
2879  // Add up all the space actually used.
2880  // In 32-bit non-varargs calls, Altivec parameters all go at the end; usually
2881  // they all go in registers, but we must reserve stack space for them for
2882  // possible use by the caller. In varargs or 64-bit calls, parameters are
2883  // assigned stack space in order, with padding so Altivec parameters are
2884  // 16-byte aligned.
2885  nAltivecParamsAtEnd = 0;
2886  for (unsigned i = 0; i != NumOps; ++i) {
2887  ISD::ArgFlagsTy Flags = Outs[i].Flags;
2888  EVT ArgVT = Outs[i].VT;
2889  // Varargs Altivec parameters are padded to a 16 byte boundary.
2890  if (ArgVT==MVT::v4f32 || ArgVT==MVT::v4i32 ||
2891  ArgVT==MVT::v8i16 || ArgVT==MVT::v16i8) {
2892  if (!isVarArg && !isPPC64) {
2893  // Non-varargs Altivec parameters go after all the non-Altivec
2894  // parameters; handle those later so we know how much padding we need.
2895  nAltivecParamsAtEnd++;
2896  continue;
2897  }
2898  // Varargs and 64-bit Altivec parameters are padded to 16 byte boundary.
2899  NumBytes = ((NumBytes+15)/16)*16;
2900  }
2901  NumBytes += CalculateStackSlotSize(ArgVT, Flags, PtrByteSize);
2902  }
2903 
2904  // Allow for Altivec parameters at the end, if needed.
2905  if (nAltivecParamsAtEnd) {
2906  NumBytes = ((NumBytes+15)/16)*16;
2907  NumBytes += 16*nAltivecParamsAtEnd;
2908  }
2909 
2910  // The prolog code of the callee may store up to 8 GPR argument registers to
2911  // the stack, allowing va_start to index over them in memory if its varargs.
2912  // Because we cannot tell if this is needed on the caller side, we have to
2913  // conservatively assume that it is needed. As such, make sure we have at
2914  // least enough stack space for the caller to store the 8 GPRs.
2915  NumBytes = std::max(NumBytes,
2916  PPCFrameLowering::getMinCallFrameSize(isPPC64, true));
2917 
2918  // Tail call needs the stack to be aligned.
2920  unsigned TargetAlign = DAG.getMachineFunction().getTarget().
2921  getFrameLowering()->getStackAlignment();
2922  unsigned AlignMask = TargetAlign-1;
2923  NumBytes = (NumBytes + AlignMask) & ~AlignMask;
2924  }
2925 
2926  return NumBytes;
2927 }
2928 
2929 /// CalculateTailCallSPDiff - Get the amount the stack pointer has to be
2930 /// adjusted to accommodate the arguments for the tailcall.
2931 static int CalculateTailCallSPDiff(SelectionDAG& DAG, bool isTailCall,
2932  unsigned ParamSize) {
2933 
2934  if (!isTailCall) return 0;
2935 
2937  unsigned CallerMinReservedArea = FI->getMinReservedArea();
2938  int SPDiff = (int)CallerMinReservedArea - (int)ParamSize;
2939  // Remember only if the new adjustement is bigger.
2940  if (SPDiff < FI->getTailCallSPDelta())
2941  FI->setTailCallSPDelta(SPDiff);
2942 
2943  return SPDiff;
2944 }
2945 
2946 /// IsEligibleForTailCallOptimization - Check whether the call is eligible
2947 /// for tail call optimization. Targets which want to do tail call
2948 /// optimization should implement this function.
2949 bool
2950 PPCTargetLowering::IsEligibleForTailCallOptimization(SDValue Callee,
2951  CallingConv::ID CalleeCC,
2952  bool isVarArg,
2953  const SmallVectorImpl<ISD::InputArg> &Ins,
2954  SelectionDAG& DAG) const {
2955  if (!getTargetMachine().Options.GuaranteedTailCallOpt)
2956  return false;
2957 
2958  // Variable argument functions are not supported.
2959  if (isVarArg)
2960  return false;
2961 
2962  MachineFunction &MF = DAG.getMachineFunction();
2963  CallingConv::ID CallerCC = MF.getFunction()->getCallingConv();
2964  if (CalleeCC == CallingConv::Fast && CallerCC == CalleeCC) {
2965  // Functions containing by val parameters are not supported.
2966  for (unsigned i = 0; i != Ins.size(); i++) {
2967  ISD::ArgFlagsTy Flags = Ins[i].Flags;
2968  if (Flags.isByVal()) return false;
2969  }
2970 
2971  // Non PIC/GOT tail calls are supported.
2973  return true;
2974 
2975  // At the moment we can only do local tail calls (in same module, hidden
2976  // or protected) if we are generating PIC.
2977  if (GlobalAddressSDNode *G = dyn_cast<GlobalAddressSDNode>(Callee))
2978  return G->getGlobal()->hasHiddenVisibility()
2979  || G->getGlobal()->hasProtectedVisibility();
2980  }
2981 
2982  return false;
2983 }
2984 
2985 /// isCallCompatibleAddress - Return the immediate to use if the specified
2986 /// 32-bit value is representable in the immediate field of a BxA instruction.
2989  if (!C) return 0;
2990 
2991  int Addr = C->getZExtValue();
2992  if ((Addr & 3) != 0 || // Low 2 bits are implicitly zero.
2993  SignExtend32<26>(Addr) != Addr)
2994  return 0; // Top 6 bits have to be sext of immediate.
2995 
2996  return DAG.getConstant((int)C->getZExtValue() >> 2,
2997  DAG.getTargetLoweringInfo().getPointerTy()).getNode();
2998 }
2999 
3000 namespace {
3001 
3002 struct TailCallArgumentInfo {
3003  SDValue Arg;
3004  SDValue FrameIdxOp;
3005  int FrameIdx;
3006 
3007  TailCallArgumentInfo() : FrameIdx(0) {}
3008 };
3009 
3010 }
3011 
3012 /// StoreTailCallArgumentsToStackSlot - Stores arguments to their stack slot.
3013 static void
3015  SDValue Chain,
3016  const SmallVectorImpl<TailCallArgumentInfo> &TailCallArgs,
3017  SmallVectorImpl<SDValue> &MemOpChains,
3018  SDLoc dl) {
3019  for (unsigned i = 0, e = TailCallArgs.size(); i != e; ++i) {
3020  SDValue Arg = TailCallArgs[i].Arg;
3021  SDValue FIN = TailCallArgs[i].FrameIdxOp;
3022  int FI = TailCallArgs[i].FrameIdx;
3023  // Store relative to framepointer.
3024  MemOpChains.push_back(DAG.getStore(Chain, dl, Arg, FIN,
3026  false, false, 0));
3027  }
3028 }
3029 
3030 /// EmitTailCallStoreFPAndRetAddr - Move the frame pointer and return address to
3031 /// the appropriate stack slot for the tail call optimized function call.
3033  MachineFunction &MF,
3034  SDValue Chain,
3035  SDValue OldRetAddr,
3036  SDValue OldFP,
3037  int SPDiff,
3038  bool isPPC64,
3039  bool isDarwinABI,
3040  SDLoc dl) {
3041  if (SPDiff) {
3042  // Calculate the new stack slot for the return address.
3043  int SlotSize = isPPC64 ? 8 : 4;
3044  int NewRetAddrLoc = SPDiff + PPCFrameLowering::getReturnSaveOffset(isPPC64,
3045  isDarwinABI);
3046  int NewRetAddr = MF.getFrameInfo()->CreateFixedObject(SlotSize,
3047  NewRetAddrLoc, true);
3048  EVT VT = isPPC64 ? MVT::i64 : MVT::i32;
3049  SDValue NewRetAddrFrIdx = DAG.getFrameIndex(NewRetAddr, VT);
3050  Chain = DAG.getStore(Chain, dl, OldRetAddr, NewRetAddrFrIdx,
3052  false, false, 0);
3053 
3054  // When using the 32/64-bit SVR4 ABI there is no need to move the FP stack
3055  // slot as the FP is never overwritten.
3056  if (isDarwinABI) {
3057  int NewFPLoc =
3058  SPDiff + PPCFrameLowering::getFramePointerSaveOffset(isPPC64, isDarwinABI);
3059  int NewFPIdx = MF.getFrameInfo()->CreateFixedObject(SlotSize, NewFPLoc,
3060  true);
3061  SDValue NewFramePtrIdx = DAG.getFrameIndex(NewFPIdx, VT);
3062  Chain = DAG.getStore(Chain, dl, OldFP, NewFramePtrIdx,
3064  false, false, 0);
3065  }
3066  }
3067  return Chain;
3068 }
3069 
3070 /// CalculateTailCallArgDest - Remember Argument for later processing. Calculate
3071 /// the position of the argument.
3072 static void
3074  SDValue Arg, int SPDiff, unsigned ArgOffset,
3075  SmallVectorImpl<TailCallArgumentInfo>& TailCallArguments) {
3076  int Offset = ArgOffset + SPDiff;
3077  uint32_t OpSize = (Arg.getValueType().getSizeInBits()+7)/8;
3078  int FI = MF.getFrameInfo()->CreateFixedObject(OpSize, Offset, true);
3079  EVT VT = isPPC64 ? MVT::i64 : MVT::i32;
3080  SDValue FIN = DAG.getFrameIndex(FI, VT);
3081  TailCallArgumentInfo Info;
3082  Info.Arg = Arg;
3083  Info.FrameIdxOp = FIN;
3084  Info.FrameIdx = FI;
3085  TailCallArguments.push_back(Info);
3086 }
3087 
3088 /// EmitTCFPAndRetAddrLoad - Emit load from frame pointer and return address
3089 /// stack slot. Returns the chain as result and the loaded frame pointers in
3090 /// LROpOut/FPOpout. Used when tail calling.
3091 SDValue PPCTargetLowering::EmitTailCallLoadFPAndRetAddr(SelectionDAG & DAG,
3092  int SPDiff,
3093  SDValue Chain,
3094  SDValue &LROpOut,
3095  SDValue &FPOpOut,
3096  bool isDarwinABI,
3097  SDLoc dl) const {
3098  if (SPDiff) {
3099  // Load the LR and FP stack slot for later adjusting.
3100  EVT VT = PPCSubTarget.isPPC64() ? MVT::i64 : MVT::i32;
3101  LROpOut = getReturnAddrFrameIndex(DAG);
3102  LROpOut = DAG.getLoad(VT, dl, Chain, LROpOut, MachinePointerInfo(),
3103  false, false, false, 0);
3104  Chain = SDValue(LROpOut.getNode(), 1);
3105 
3106  // When using the 32/64-bit SVR4 ABI there is no need to load the FP stack
3107  // slot as the FP is never overwritten.
3108  if (isDarwinABI) {
3109  FPOpOut = getFramePointerFrameIndex(DAG);
3110  FPOpOut = DAG.getLoad(VT, dl, Chain, FPOpOut, MachinePointerInfo(),
3111  false, false, false, 0);
3112  Chain = SDValue(FPOpOut.getNode(), 1);
3113  }
3114  }
3115  return Chain;
3116 }
3117 
3118 /// CreateCopyOfByValArgument - Make a copy of an aggregate at address specified
3119 /// by "Src" to address "Dst" of size "Size". Alignment information is
3120 /// specified by the specific parameter attribute. The copy will be passed as
3121 /// a byval function parameter.
3122 /// Sometimes what we are copying is the end of a larger object, the part that
3123 /// does not fit in registers.
3124 static SDValue
3126  ISD::ArgFlagsTy Flags, SelectionDAG &DAG,
3127  SDLoc dl) {
3128  SDValue SizeNode = DAG.getConstant(Flags.getByValSize(), MVT::i32);
3129  return DAG.getMemcpy(Chain, dl, Dst, Src, SizeNode, Flags.getByValAlign(),
3130  false, false, MachinePointerInfo(0),
3131  MachinePointerInfo(0));
3132 }
3133 
3134 /// LowerMemOpCallTo - Store the argument to the stack or remember it in case of
3135 /// tail calls.
3136 static void
3138  SDValue Arg, SDValue PtrOff, int SPDiff,
3139  unsigned ArgOffset, bool isPPC64, bool isTailCall,
3140  bool isVector, SmallVectorImpl<SDValue> &MemOpChains,
3141  SmallVectorImpl<TailCallArgumentInfo> &TailCallArguments,
3142  SDLoc dl) {
3143  EVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy();
3144  if (!isTailCall) {
3145  if (isVector) {
3146  SDValue StackPtr;
3147  if (isPPC64)
3148  StackPtr = DAG.getRegister(PPC::X1, MVT::i64);
3149  else
3150  StackPtr = DAG.getRegister(PPC::R1, MVT::i32);
3151  PtrOff = DAG.getNode(ISD::ADD, dl, PtrVT, StackPtr,
3152  DAG.getConstant(ArgOffset, PtrVT));
3153  }
3154  MemOpChains.push_back(DAG.getStore(Chain, dl, Arg, PtrOff,
3155  MachinePointerInfo(), false, false, 0));
3156  // Calculate and remember argument location.
3157  } else CalculateTailCallArgDest(DAG, MF, isPPC64, Arg, SPDiff, ArgOffset,
3158  TailCallArguments);
3159 }
3160 
3161 static
3162 void PrepareTailCall(SelectionDAG &DAG, SDValue &InFlag, SDValue &Chain,
3163  SDLoc dl, bool isPPC64, int SPDiff, unsigned NumBytes,
3164  SDValue LROp, SDValue FPOp, bool isDarwinABI,
3165  SmallVectorImpl<TailCallArgumentInfo> &TailCallArguments) {
3166  MachineFunction &MF = DAG.getMachineFunction();
3167 
3168  // Emit a sequence of copyto/copyfrom virtual registers for arguments that
3169  // might overwrite each other in case of tail call optimization.
3170  SmallVector<SDValue, 8> MemOpChains2;
3171  // Do not flag preceding copytoreg stuff together with the following stuff.
3172  InFlag = SDValue();
3173  StoreTailCallArgumentsToStackSlot(DAG, Chain, TailCallArguments,
3174  MemOpChains2, dl);
3175  if (!MemOpChains2.empty())
3176  Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other,
3177  &MemOpChains2[0], MemOpChains2.size());
3178 
3179  // Store the return address to the appropriate stack slot.
3180  Chain = EmitTailCallStoreFPAndRetAddr(DAG, MF, Chain, LROp, FPOp, SPDiff,
3181  isPPC64, isDarwinABI, dl);
3182 
3183  // Emit callseq_end just before tailcall node.
3184  Chain = DAG.getCALLSEQ_END(Chain, DAG.getIntPtrConstant(NumBytes, true),
3185  DAG.getIntPtrConstant(0, true), InFlag, dl);
3186  InFlag = Chain.getValue(1);
3187 }
3188 
3189 static
3190 unsigned PrepareCall(SelectionDAG &DAG, SDValue &Callee, SDValue &InFlag,
3191  SDValue &Chain, SDLoc dl, int SPDiff, bool isTailCall,
3192  SmallVectorImpl<std::pair<unsigned, SDValue> > &RegsToPass,
3193  SmallVectorImpl<SDValue> &Ops, std::vector<EVT> &NodeTys,
3194  const PPCSubtarget &PPCSubTarget) {
3195 
3196  bool isPPC64 = PPCSubTarget.isPPC64();
3197  bool isSVR4ABI = PPCSubTarget.isSVR4ABI();
3198 
3199  EVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy();
3200  NodeTys.push_back(MVT::Other); // Returns a chain
3201  NodeTys.push_back(MVT::Glue); // Returns a flag for retval copy to use.
3202 
3203  unsigned CallOpc = PPCISD::CALL;
3204 
3205  bool needIndirectCall = true;
3206  if (SDNode *Dest = isBLACompatibleAddress(Callee, DAG)) {
3207  // If this is an absolute destination address, use the munged value.
3208  Callee = SDValue(Dest, 0);
3209  needIndirectCall = false;
3210  }
3211 
3212  if (GlobalAddressSDNode *G = dyn_cast<GlobalAddressSDNode>(Callee)) {
3213  // XXX Work around for http://llvm.org/bugs/show_bug.cgi?id=5201
3214  // Use indirect calls for ALL functions calls in JIT mode, since the
3215  // far-call stubs may be outside relocation limits for a BL instruction.
3216  if (!DAG.getTarget().getSubtarget<PPCSubtarget>().isJITCodeModel()) {
3217  unsigned OpFlags = 0;
3218  if (DAG.getTarget().getRelocationModel() != Reloc::Static &&
3219  (PPCSubTarget.getTargetTriple().isMacOSX() &&
3220  PPCSubTarget.getTargetTriple().isMacOSXVersionLT(10, 5)) &&
3221  (G->getGlobal()->isDeclaration() ||
3222  G->getGlobal()->isWeakForLinker())) {
3223  // PC-relative references to external symbols should go through $stub,
3224  // unless we're building with the leopard linker or later, which
3225  // automatically synthesizes these stubs.
3226  OpFlags = PPCII::MO_DARWIN_STUB;
3227  }
3228 
3229  // If the callee is a GlobalAddress/ExternalSymbol node (quite common,
3230  // every direct call is) turn it into a TargetGlobalAddress /
3231  // TargetExternalSymbol node so that legalize doesn't hack it.
3232  Callee = DAG.getTargetGlobalAddress(G->getGlobal(), dl,
3233  Callee.getValueType(),
3234  0, OpFlags);
3235  needIndirectCall = false;
3236  }
3237  }
3238 
3239  if (ExternalSymbolSDNode *S = dyn_cast<ExternalSymbolSDNode>(Callee)) {
3240  unsigned char OpFlags = 0;
3241 
3242  if (DAG.getTarget().getRelocationModel() != Reloc::Static &&
3243  (PPCSubTarget.getTargetTriple().isMacOSX() &&
3244  PPCSubTarget.getTargetTriple().isMacOSXVersionLT(10, 5))) {
3245  // PC-relative references to external symbols should go through $stub,
3246  // unless we're building with the leopard linker or later, which
3247  // automatically synthesizes these stubs.
3248  OpFlags = PPCII::MO_DARWIN_STUB;
3249  }
3250 
3251  Callee = DAG.getTargetExternalSymbol(S->getSymbol(), Callee.getValueType(),
3252  OpFlags);
3253  needIndirectCall = false;
3254  }
3255 
3256  if (needIndirectCall) {
3257  // Otherwise, this is an indirect call. We have to use a MTCTR/BCTRL pair
3258  // to do the call, we can't use PPCISD::CALL.
3259  SDValue MTCTROps[] = {Chain, Callee, InFlag};
3260 
3261  if (isSVR4ABI && isPPC64) {
3262  // Function pointers in the 64-bit SVR4 ABI do not point to the function
3263  // entry point, but to the function descriptor (the function entry point
3264  // address is part of the function descriptor though).
3265  // The function descriptor is a three doubleword structure with the
3266  // following fields: function entry point, TOC base address and
3267  // environment pointer.
3268  // Thus for a call through a function pointer, the following actions need
3269  // to be performed:
3270  // 1. Save the TOC of the caller in the TOC save area of its stack
3271  // frame (this is done in LowerCall_Darwin() or LowerCall_64SVR4()).
3272  // 2. Load the address of the function entry point from the function
3273  // descriptor.
3274  // 3. Load the TOC of the callee from the function descriptor into r2.
3275  // 4. Load the environment pointer from the function descriptor into
3276  // r11.
3277  // 5. Branch to the function entry point address.
3278  // 6. On return of the callee, the TOC of the caller needs to be
3279  // restored (this is done in FinishCall()).
3280  //
3281  // All those operations are flagged together to ensure that no other
3282  // operations can be scheduled in between. E.g. without flagging the
3283  // operations together, a TOC access in the caller could be scheduled
3284  // between the load of the callee TOC and the branch to the callee, which
3285  // results in the TOC access going through the TOC of the callee instead
3286  // of going through the TOC of the caller, which leads to incorrect code.
3287 
3288  // Load the address of the function entry point from the function
3289  // descriptor.
3291  SDValue LoadFuncPtr = DAG.getNode(PPCISD::LOAD, dl, VTs, MTCTROps,
3292  InFlag.getNode() ? 3 : 2);
3293  Chain = LoadFuncPtr.getValue(1);
3294  InFlag = LoadFuncPtr.getValue(2);
3295 
3296  // Load environment pointer into r11.
3297  // Offset of the environment pointer within the function descriptor.
3298  SDValue PtrOff = DAG.getIntPtrConstant(16);
3299 
3300  SDValue AddPtr = DAG.getNode(ISD::ADD, dl, MVT::i64, Callee, PtrOff);
3301  SDValue LoadEnvPtr = DAG.getNode(PPCISD::LOAD, dl, VTs, Chain, AddPtr,
3302  InFlag);
3303  Chain = LoadEnvPtr.getValue(1);
3304  InFlag = LoadEnvPtr.getValue(2);
3305 
3306  SDValue EnvVal = DAG.getCopyToReg(Chain, dl, PPC::X11, LoadEnvPtr,
3307  InFlag);
3308  Chain = EnvVal.getValue(0);
3309  InFlag = EnvVal.getValue(1);
3310 
3311  // Load TOC of the callee into r2. We are using a target-specific load
3312  // with r2 hard coded, because the result of a target-independent load
3313  // would never go directly into r2, since r2 is a reserved register (which
3314  // prevents the register allocator from allocating it), resulting in an
3315  // additional register being allocated and an unnecessary move instruction
3316  // being generated.
3317  VTs = DAG.getVTList(MVT::Other, MVT::Glue);
3318  SDValue LoadTOCPtr = DAG.getNode(PPCISD::LOAD_TOC, dl, VTs, Chain,
3319  Callee, InFlag);
3320  Chain = LoadTOCPtr.getValue(0);
3321  InFlag = LoadTOCPtr.getValue(1);
3322 
3323  MTCTROps[0] = Chain;
3324  MTCTROps[1] = LoadFuncPtr;
3325  MTCTROps[2] = InFlag;
3326  }
3327 
3328  Chain = DAG.getNode(PPCISD::MTCTR, dl, NodeTys, MTCTROps,
3329  2 + (InFlag.getNode() != 0));
3330  InFlag = Chain.getValue(1);
3331 
3332  NodeTys.clear();
3333  NodeTys.push_back(MVT::Other);
3334  NodeTys.push_back(MVT::Glue);
3335  Ops.push_back(Chain);
3336  CallOpc = PPCISD::BCTRL;
3337  Callee.setNode(0);
3338  // Add use of X11 (holding environment pointer)
3339  if (isSVR4ABI && isPPC64)
3340  Ops.push_back(DAG.getRegister(PPC::X11, PtrVT));
3341  // Add CTR register as callee so a bctr can be emitted later.
3342  if (isTailCall)
3343  Ops.push_back(DAG.getRegister(isPPC64 ? PPC::CTR8 : PPC::CTR, PtrVT));
3344  }
3345 
3346  // If this is a direct call, pass the chain and the callee.
3347  if (Callee.getNode()) {
3348  Ops.push_back(Chain);
3349  Ops.push_back(Callee);
3350  }
3351  // If this is a tail call add stack pointer delta.
3352  if (isTailCall)
3353  Ops.push_back(DAG.getConstant(SPDiff, MVT::i32));
3354 
3355  // Add argument registers to the end of the list so that they are known live
3356  // into the call.
3357  for (unsigned i = 0, e = RegsToPass.size(); i != e; ++i)
3358  Ops.push_back(DAG.getRegister(RegsToPass[i].first,
3359  RegsToPass[i].second.getValueType()));
3360 
3361  return CallOpc;
3362 }
3363 
3364 static
3365 bool isLocalCall(const SDValue &Callee)
3366 {
3367  if (GlobalAddressSDNode *G = dyn_cast<GlobalAddressSDNode>(Callee))
3368  return !G->getGlobal()->isDeclaration() &&
3369  !G->getGlobal()->isWeakForLinker();
3370  return false;
3371 }
3372 
3373 SDValue
3374 PPCTargetLowering::LowerCallResult(SDValue Chain, SDValue InFlag,
3375  CallingConv::ID CallConv, bool isVarArg,
3376  const SmallVectorImpl<ISD::InputArg> &Ins,
3377  SDLoc dl, SelectionDAG &DAG,
3378  SmallVectorImpl<SDValue> &InVals) const {
3379 
3381  CCState CCRetInfo(CallConv, isVarArg, DAG.getMachineFunction(),
3382  getTargetMachine(), RVLocs, *DAG.getContext());
3383  CCRetInfo.AnalyzeCallResult(Ins, RetCC_PPC);
3384 
3385  // Copy all of the result registers out of their specified physreg.
3386  for (unsigned i = 0, e = RVLocs.size(); i != e; ++i) {
3387  CCValAssign &VA = RVLocs[i];
3388  assert(VA.isRegLoc() && "Can only return in registers!");
3389 
3390  SDValue Val = DAG.getCopyFromReg(Chain, dl,
3391  VA.getLocReg(), VA.getLocVT(), InFlag);
3392  Chain = Val.getValue(1);
3393  InFlag = Val.getValue(2);
3394 
3395  switch (VA.getLocInfo()) {
3396  default: llvm_unreachable("Unknown loc info!");
3397  case CCValAssign::Full: break;
3398  case CCValAssign::AExt:
3399  Val = DAG.getNode(ISD::TRUNCATE, dl, VA.getValVT(), Val);
3400  break;
3401  case CCValAssign::ZExt:
3402  Val = DAG.getNode(ISD::AssertZext, dl, VA.getLocVT(), Val,
3403  DAG.getValueType(VA.getValVT()));
3404  Val = DAG.getNode(ISD::TRUNCATE, dl, VA.getValVT(), Val);
3405  break;
3406  case CCValAssign::SExt:
3407  Val = DAG.getNode(ISD::AssertSext, dl, VA.getLocVT(), Val,
3408  DAG.getValueType(VA.getValVT()));
3409  Val = DAG.getNode(ISD::TRUNCATE, dl, VA.getValVT(), Val);
3410  break;
3411  }
3412 
3413  InVals.push_back(Val);
3414  }
3415 
3416  return Chain;
3417 }
3418 
3419 SDValue
3420 PPCTargetLowering::FinishCall(CallingConv::ID CallConv, SDLoc dl,
3421  bool isTailCall, bool isVarArg,
3422  SelectionDAG &DAG,
3423  SmallVector<std::pair<unsigned, SDValue>, 8>
3424  &RegsToPass,
3425  SDValue InFlag, SDValue Chain,
3426  SDValue &Callee,
3427  int SPDiff, unsigned NumBytes,
3428  const SmallVectorImpl<ISD::InputArg> &Ins,
3429  SmallVectorImpl<SDValue> &InVals) const {
3430  std::vector<EVT> NodeTys;
3432  unsigned CallOpc = PrepareCall(DAG, Callee, InFlag, Chain, dl, SPDiff,
3433  isTailCall, RegsToPass, Ops, NodeTys,
3434  PPCSubTarget);
3435 
3436  // Add implicit use of CR bit 6 for 32-bit SVR4 vararg calls
3437  if (isVarArg && PPCSubTarget.isSVR4ABI() && !PPCSubTarget.isPPC64())
3438  Ops.push_back(DAG.getRegister(PPC::CR1EQ, MVT::i32));
3439 
3440  // When performing tail call optimization the callee pops its arguments off
3441  // the stack. Account for this here so these bytes can be pushed back on in
3442  // PPCFrameLowering::eliminateCallFramePseudoInstr.
3443  int BytesCalleePops =
3444  (CallConv == CallingConv::Fast &&
3446 
3447  // Add a register mask operand representing the call-preserved registers.
3449  const uint32_t *Mask = TRI->getCallPreservedMask(CallConv);
3450  assert(Mask && "Missing call preserved mask for calling convention");
3451  Ops.push_back(DAG.getRegisterMask(Mask));
3452 
3453  if (InFlag.getNode())
3454  Ops.push_back(InFlag);
3455 
3456  // Emit tail call.
3457  if (isTailCall) {
3458  assert(((Callee.getOpcode() == ISD::Register &&
3459  cast<RegisterSDNode>(Callee)->getReg() == PPC::CTR) ||
3460  Callee.getOpcode() == ISD::TargetExternalSymbol ||
3461  Callee.getOpcode() == ISD::TargetGlobalAddress ||
3462  isa<ConstantSDNode>(Callee)) &&
3463  "Expecting an global address, external symbol, absolute value or register");
3464 
3465  return DAG.getNode(PPCISD::TC_RETURN, dl, MVT::Other, &Ops[0], Ops.size());
3466  }
3467 
3468  // Add a NOP immediately after the branch instruction when using the 64-bit
3469  // SVR4 ABI. At link time, if caller and callee are in a different module and
3470  // thus have a different TOC, the call will be replaced with a call to a stub
3471  // function which saves the current TOC, loads the TOC of the callee and
3472  // branches to the callee. The NOP will be replaced with a load instruction
3473  // which restores the TOC of the caller from the TOC save slot of the current
3474  // stack frame. If caller and callee belong to the same module (and have the
3475  // same TOC), the NOP will remain unchanged.
3476 
3477  bool needsTOCRestore = false;
3478  if (!isTailCall && PPCSubTarget.isSVR4ABI()&& PPCSubTarget.isPPC64()) {
3479  if (CallOpc == PPCISD::BCTRL) {
3480  // This is a call through a function pointer.
3481  // Restore the caller TOC from the save area into R2.
3482  // See PrepareCall() for more information about calls through function
3483  // pointers in the 64-bit SVR4 ABI.
3484  // We are using a target-specific load with r2 hard coded, because the
3485  // result of a target-independent load would never go directly into r2,
3486  // since r2 is a reserved register (which prevents the register allocator
3487  // from allocating it), resulting in an additional register being
3488  // allocated and an unnecessary move instruction being generated.
3489  needsTOCRestore = true;
3490  } else if ((CallOpc == PPCISD::CALL) &&
3491  (!isLocalCall(Callee) ||
3493  // Otherwise insert NOP for non-local calls.
3494  CallOpc = PPCISD::CALL_NOP;
3495  }
3496  }
3497 
3498  Chain = DAG.getNode(CallOpc, dl, NodeTys, &Ops[0], Ops.size());
3499  InFlag = Chain.getValue(1);
3500 
3501  if (needsTOCRestore) {
3502  SDVTList VTs = DAG.getVTList(MVT::Other, MVT::Glue);
3503  Chain = DAG.getNode(PPCISD::TOC_RESTORE, dl, VTs, Chain, InFlag);
3504  InFlag = Chain.getValue(1);
3505  }
3506 
3507  Chain = DAG.getCALLSEQ_END(Chain, DAG.getIntPtrConstant(NumBytes, true),
3508  DAG.getIntPtrConstant(BytesCalleePops, true),
3509  InFlag, dl);
3510  if (!Ins.empty())
3511  InFlag = Chain.getValue(1);
3512 
3513  return LowerCallResult(Chain, InFlag, CallConv, isVarArg,
3514  Ins, dl, DAG, InVals);
3515 }
3516 
3517 SDValue
3518 PPCTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI,
3519  SmallVectorImpl<SDValue> &InVals) const {
3520  SelectionDAG &DAG = CLI.DAG;
3521  SDLoc &dl = CLI.DL;
3523  SmallVectorImpl<SDValue> &OutVals = CLI.OutVals;
3525  SDValue Chain = CLI.Chain;
3526  SDValue Callee = CLI.Callee;
3527  bool &isTailCall = CLI.IsTailCall;
3528  CallingConv::ID CallConv = CLI.CallConv;
3529  bool isVarArg = CLI.IsVarArg;
3530 
3531  if (isTailCall)
3532  isTailCall = IsEligibleForTailCallOptimization(Callee, CallConv, isVarArg,
3533  Ins, DAG);
3534 
3535  if (PPCSubTarget.isSVR4ABI()) {
3536  if (PPCSubTarget.isPPC64())
3537  return LowerCall_64SVR4(Chain, Callee, CallConv, isVarArg,
3538  isTailCall, Outs, OutVals, Ins,
3539  dl, DAG, InVals);
3540  else
3541  return LowerCall_32SVR4(Chain, Callee, CallConv, isVarArg,
3542  isTailCall, Outs, OutVals, Ins,
3543  dl, DAG, InVals);
3544  }
3545 
3546  return LowerCall_Darwin(Chain, Callee, CallConv, isVarArg,
3547  isTailCall, Outs, OutVals, Ins,
3548  dl, DAG, InVals);
3549 }
3550 
3551 SDValue
3552 PPCTargetLowering::LowerCall_32SVR4(SDValue Chain, SDValue Callee,
3553  CallingConv::ID CallConv, bool isVarArg,
3554  bool isTailCall,
3555  const SmallVectorImpl<ISD::OutputArg> &Outs,
3556  const SmallVectorImpl<SDValue> &OutVals,
3557  const SmallVectorImpl<ISD::InputArg> &Ins,
3558  SDLoc dl, SelectionDAG &DAG,
3559  SmallVectorImpl<SDValue> &InVals) const {
3560  // See PPCTargetLowering::LowerFormalArguments_32SVR4() for a description
3561  // of the 32-bit SVR4 ABI stack frame layout.
3562 
3563  assert((CallConv == CallingConv::C ||
3564  CallConv == CallingConv::Fast) && "Unknown calling convention!");
3565 
3566  unsigned PtrByteSize = 4;
3567 
3568  MachineFunction &MF = DAG.getMachineFunction();
3569 
3570  // Mark this function as potentially containing a function that contains a
3571  // tail call. As a consequence the frame pointer will be used for dynamicalloc
3572  // and restoring the callers stack pointer in this functions epilog. This is
3573  // done because by tail calling the called function might overwrite the value
3574  // in this function's (MF) stack pointer stack slot 0(SP).
3575  if (getTargetMachine().Options.GuaranteedTailCallOpt &&
3576  CallConv == CallingConv::Fast)
3577  MF.getInfo<PPCFunctionInfo>()->setHasFastCall();
3578 
3579  // Count how many bytes are to be pushed on the stack, including the linkage
3580  // area, parameter list area and the part of the local variable space which
3581  // contains copies of aggregates which are passed by value.
3582 
3583  // Assign locations to all of the outgoing arguments.
3585  CCState CCInfo(CallConv, isVarArg, DAG.getMachineFunction(),
3586  getTargetMachine(), ArgLocs, *DAG.getContext());
3587 
3588  // Reserve space for the linkage area on the stack.
3589  CCInfo.AllocateStack(PPCFrameLowering::getLinkageSize(false, false), PtrByteSize);
3590 
3591  if (isVarArg) {
3592  // Handle fixed and variable vector arguments differently.
3593  // Fixed vector arguments go into registers as long as registers are
3594  // available. Variable vector arguments always go into memory.
3595  unsigned NumArgs = Outs.size();
3596 
3597  for (unsigned i = 0; i != NumArgs; ++i) {
3598  MVT ArgVT = Outs[i].VT;
3599  ISD::ArgFlagsTy ArgFlags = Outs[i].Flags;
3600  bool Result;
3601 
3602  if (Outs[i].IsFixed) {
3603  Result = CC_PPC32_SVR4(i, ArgVT, ArgVT, CCValAssign::Full, ArgFlags,
3604  CCInfo);
3605  } else {
3606  Result = CC_PPC32_SVR4_VarArg(i, ArgVT, ArgVT, CCValAssign::Full,
3607  ArgFlags, CCInfo);
3608  }
3609 
3610  if (Result) {
3611 #ifndef NDEBUG
3612  errs() << "Call operand #" << i << " has unhandled type "
3613  << EVT(ArgVT).getEVTString() << "\n";
3614 #endif
3615  llvm_unreachable(0);
3616  }
3617  }
3618  } else {
3619  // All arguments are treated the same.
3620  CCInfo.AnalyzeCallOperands(Outs, CC_PPC32_SVR4);
3621  }
3622 
3623  // Assign locations to all of the outgoing aggregate by value arguments.
3624  SmallVector<CCValAssign, 16> ByValArgLocs;
3625  CCState CCByValInfo(CallConv, isVarArg, DAG.getMachineFunction(),
3626  getTargetMachine(), ByValArgLocs, *DAG.getContext());
3627 
3628  // Reserve stack space for the allocations in CCInfo.
3629  CCByValInfo.AllocateStack(CCInfo.getNextStackOffset(), PtrByteSize);
3630 
3631  CCByValInfo.AnalyzeCallOperands(Outs, CC_PPC32_SVR4_ByVal);
3632 
3633  // Size of the linkage area, parameter list area and the part of the local
3634  // space variable where copies of aggregates which are passed by value are
3635  // stored.
3636  unsigned NumBytes = CCByValInfo.getNextStackOffset();
3637 
3638  // Calculate by how many bytes the stack has to be adjusted in case of tail
3639  // call optimization.
3640  int SPDiff = CalculateTailCallSPDiff(DAG, isTailCall, NumBytes);
3641 
3642  // Adjust the stack pointer for the new arguments...
3643  // These operations are automatically eliminated by the prolog/epilog pass
3644  Chain = DAG.getCALLSEQ_START(Chain, DAG.getIntPtrConstant(NumBytes, true),
3645  dl);
3646  SDValue CallSeqStart = Chain;
3647 
3648  // Load the return address and frame pointer so it can be moved somewhere else
3649  // later.
3650  SDValue LROp, FPOp;
3651  Chain = EmitTailCallLoadFPAndRetAddr(DAG, SPDiff, Chain, LROp, FPOp, false,
3652  dl);
3653 
3654  // Set up a copy of the stack pointer for use loading and storing any
3655  // arguments that may not fit in the registers available for argument
3656  // passing.
3657  SDValue StackPtr = DAG.getRegister(PPC::R1, MVT::i32);
3658 
3660  SmallVector<TailCallArgumentInfo, 8> TailCallArguments;
3661  SmallVector<SDValue, 8> MemOpChains;
3662 
3663  bool seenFloatArg = false;
3664  // Walk the register/memloc assignments, inserting copies/loads.
3665  for (unsigned i = 0, j = 0, e = ArgLocs.size();
3666  i != e;
3667  ++i) {
3668  CCValAssign &VA = ArgLocs[i];
3669  SDValue Arg = OutVals[i];
3670  ISD::ArgFlagsTy Flags = Outs[i].Flags;
3671 
3672  if (Flags.isByVal()) {
3673  // Argument is an aggregate which is passed by value, thus we need to
3674  // create a copy of it in the local variable space of the current stack
3675  // frame (which is the stack frame of the caller) and pass the address of
3676  // this copy to the callee.
3677  assert((j < ByValArgLocs.size()) && "Index out of bounds!");
3678  CCValAssign &ByValVA = ByValArgLocs[j++];
3679  assert((VA.getValNo() == ByValVA.getValNo()) && "ValNo mismatch!");
3680 
3681  // Memory reserved in the local variable space of the callers stack frame.
3682  unsigned LocMemOffset = ByValVA.getLocMemOffset();
3683 
3684  SDValue PtrOff = DAG.getIntPtrConstant(LocMemOffset);
3685  PtrOff = DAG.getNode(ISD::ADD, dl, getPointerTy(), StackPtr, PtrOff);
3686 
3687  // Create a copy of the argument in the local area of the current
3688  // stack frame.
3689  SDValue MemcpyCall =
3690  CreateCopyOfByValArgument(Arg, PtrOff,
3691  CallSeqStart.getNode()->getOperand(0),
3692  Flags, DAG, dl);
3693 
3694  // This must go outside the CALLSEQ_START..END.
3695  SDValue NewCallSeqStart = DAG.getCALLSEQ_START(MemcpyCall,
3696  CallSeqStart.getNode()->getOperand(1),
3697  SDLoc(MemcpyCall));
3698  DAG.ReplaceAllUsesWith(CallSeqStart.getNode(),
3699  NewCallSeqStart.getNode());
3700  Chain = CallSeqStart = NewCallSeqStart;
3701 
3702  // Pass the address of the aggregate copy on the stack either in a
3703  // physical register or in the parameter list area of the current stack
3704  // frame to the callee.
3705  Arg = PtrOff;
3706  }
3707 
3708  if (VA.isRegLoc()) {
3709  seenFloatArg |= VA.getLocVT().isFloatingPoint();
3710  // Put argument in a physical register.
3711  RegsToPass.push_back(std::make_pair(VA.getLocReg(), Arg));
3712  } else {
3713  // Put argument in the parameter list area of the current stack frame.
3714  assert(VA.isMemLoc());
3715  unsigned LocMemOffset = VA.getLocMemOffset();
3716 
3717  if (!isTailCall) {
3718  SDValue PtrOff = DAG.getIntPtrConstant(LocMemOffset);
3719  PtrOff = DAG.getNode(ISD::ADD, dl, getPointerTy(), StackPtr, PtrOff);
3720 
3721  MemOpChains.push_back(DAG.getStore(Chain, dl, Arg, PtrOff,
3723  false, false, 0));
3724  } else {
3725  // Calculate and remember argument location.
3726  CalculateTailCallArgDest(DAG, MF, false, Arg, SPDiff, LocMemOffset,
3727  TailCallArguments);
3728  }
3729  }
3730  }
3731 
3732  if (!MemOpChains.empty())
3733  Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other,
3734  &MemOpChains[0], MemOpChains.size());
3735 
3736  // Build a sequence of copy-to-reg nodes chained together with token chain
3737  // and flag operands which copy the outgoing args into the appropriate regs.
3738  SDValue InFlag;
3739  for (unsigned i = 0, e = RegsToPass.size(); i != e; ++i) {
3740  Chain = DAG.getCopyToReg(Chain, dl, RegsToPass[i].first,
3741  RegsToPass[i].second, InFlag);
3742  InFlag = Chain.getValue(1);
3743  }
3744 
3745  // Set CR bit 6 to true if this is a vararg call with floating args passed in
3746  // registers.
3747  if (isVarArg) {
3748  SDVTList VTs = DAG.getVTList(MVT::Other, MVT::Glue);
3749  SDValue Ops[] = { Chain, InFlag };
3750 
3751  Chain = DAG.getNode(seenFloatArg ? PPCISD::CR6SET : PPCISD::CR6UNSET,
3752  dl, VTs, Ops, InFlag.getNode() ? 2 : 1);
3753 
3754  InFlag = Chain.getValue(1);
3755  }
3756 
3757  if (isTailCall)
3758  PrepareTailCall(DAG, InFlag, Chain, dl, false, SPDiff, NumBytes, LROp, FPOp,
3759  false, TailCallArguments);
3760 
3761  return FinishCall(CallConv, dl, isTailCall, isVarArg, DAG,
3762  RegsToPass, InFlag, Chain, Callee, SPDiff, NumBytes,
3763  Ins, InVals);
3764 }
3765 
3766 // Copy an argument into memory, being careful to do this outside the
3767 // call sequence for the call to which the argument belongs.
3768 SDValue
3769 PPCTargetLowering::createMemcpyOutsideCallSeq(SDValue Arg, SDValue PtrOff,
3770  SDValue CallSeqStart,
3771  ISD::ArgFlagsTy Flags,
3772  SelectionDAG &DAG,
3773  SDLoc dl) const {
3774  SDValue MemcpyCall = CreateCopyOfByValArgument(Arg, PtrOff,
3775  CallSeqStart.getNode()->getOperand(0),
3776  Flags, DAG, dl);
3777  // The MEMCPY must go outside the CALLSEQ_START..END.
3778  SDValue NewCallSeqStart = DAG.getCALLSEQ_START(MemcpyCall,
3779  CallSeqStart.getNode()->getOperand(1),
3780  SDLoc(MemcpyCall));
3781  DAG.ReplaceAllUsesWith(CallSeqStart.getNode(),
3782  NewCallSeqStart.getNode());
3783  return NewCallSeqStart;
3784 }
3785 
3786 SDValue
3787 PPCTargetLowering::LowerCall_64SVR4(SDValue Chain, SDValue Callee,
3788  CallingConv::ID CallConv, bool isVarArg,
3789  bool isTailCall,
3790  const SmallVectorImpl<ISD::OutputArg> &Outs,
3791  const SmallVectorImpl<SDValue> &OutVals,
3792  const SmallVectorImpl<ISD::InputArg> &Ins,
3793  SDLoc dl, SelectionDAG &DAG,
3794  SmallVectorImpl<SDValue> &InVals) const {
3795 
3796  unsigned NumOps = Outs.size();
3797 
3798  EVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy();
3799  unsigned PtrByteSize = 8;
3800 
3801  MachineFunction &MF = DAG.getMachineFunction();
3802 
3803  // Mark this function as potentially containing a function that contains a
3804  // tail call. As a consequence the frame pointer will be used for dynamicalloc
3805  // and restoring the callers stack pointer in this functions epilog. This is
3806  // done because by tail calling the called function might overwrite the value
3807  // in this function's (MF) stack pointer stack slot 0(SP).
3808  if (getTargetMachine().Options.GuaranteedTailCallOpt &&
3809  CallConv == CallingConv::Fast)
3810  MF.getInfo<PPCFunctionInfo>()->setHasFastCall();
3811 
3812  unsigned nAltivecParamsAtEnd = 0;
3813 
3814  // Count how many bytes are to be pushed on the stack, including the linkage
3815  // area, and parameter passing area. We start with at least 48 bytes, which
3816  // is reserved space for [SP][CR][LR][3 x unused].
3817  // NOTE: For PPC64, nAltivecParamsAtEnd always remains zero as a result
3818  // of this call.
3819  unsigned NumBytes =
3820  CalculateParameterAndLinkageAreaSize(DAG, true, isVarArg, CallConv,
3821  Outs, OutVals, nAltivecParamsAtEnd);
3822 
3823  // Calculate by how many bytes the stack has to be adjusted in case of tail
3824  // call optimization.
3825  int SPDiff = CalculateTailCallSPDiff(DAG, isTailCall, NumBytes);
3826 
3827  // To protect arguments on the stack from being clobbered in a tail call,
3828  // force all the loads to happen before doing any other lowering.
3829  if (isTailCall)
3830  Chain = DAG.getStackArgumentTokenFactor(Chain);
3831 
3832  // Adjust the stack pointer for the new arguments...
3833  // These operations are automatically eliminated by the prolog/epilog pass
3834  Chain = DAG.getCALLSEQ_START(Chain, DAG.getIntPtrConstant(NumBytes, true),
3835  dl);
3836  SDValue CallSeqStart = Chain;
3837 
3838  // Load the return address and frame pointer so it can be move somewhere else
3839  // later.
3840  SDValue LROp, FPOp;
3841  Chain = EmitTailCallLoadFPAndRetAddr(DAG, SPDiff, Chain, LROp, FPOp, true,
3842  dl);
3843 
3844  // Set up a copy of the stack pointer for use loading and storing any
3845  // arguments that may not fit in the registers available for argument
3846  // passing.
3847  SDValue StackPtr = DAG.getRegister(PPC::X1, MVT::i64);
3848 
3849  // Figure out which arguments are going to go in registers, and which in
3850  // memory. Also, if this is a vararg function, floating point operations
3851  // must be stored to our stack, and loaded into integer regs as well, if
3852  // any integer regs are available for argument passing.
3853  unsigned ArgOffset = PPCFrameLowering::getLinkageSize(true, true);
3854  unsigned GPR_idx = 0, FPR_idx = 0, VR_idx = 0;
3855 
3856  static const uint16_t GPR[] = {
3857  PPC::X3, PPC::X4, PPC::X5, PPC::X6,
3858  PPC::X7, PPC::X8, PPC::X9, PPC::X10,
3859  };
3860  static const uint16_t *FPR = GetFPR();
3861 
3862  static const uint16_t VR[] = {
3863  PPC::V2, PPC::V3, PPC::V4, PPC::V5, PPC::V6, PPC::V7, PPC::V8,
3864  PPC::V9, PPC::V10, PPC::V11, PPC::V12, PPC::V13
3865  };
3866  const unsigned NumGPRs = array_lengthof(GPR);
3867  const unsigned NumFPRs = 13;
3868  const unsigned NumVRs = array_lengthof(VR);
3869 
3871  SmallVector<TailCallArgumentInfo, 8> TailCallArguments;
3872 
3873  SmallVector<SDValue, 8> MemOpChains;
3874  for (unsigned i = 0; i != NumOps; ++i) {
3875  SDValue Arg = OutVals[i];
3876  ISD::ArgFlagsTy Flags = Outs[i].Flags;
3877 
3878  // PtrOff will be used to store the current argument to the stack if a
3879  // register cannot be found for it.
3880  SDValue PtrOff;
3881 
3882  PtrOff = DAG.getConstant(ArgOffset, StackPtr.getValueType());
3883 
3884  PtrOff = DAG.getNode(ISD::ADD, dl, PtrVT, StackPtr, PtrOff);
3885 
3886  // Promote integers to 64-bit values.
3887  if (Arg.getValueType() == MVT::i32) {
3888  // FIXME: Should this use ANY_EXTEND if neither sext nor zext?
3889  unsigned ExtOp = Flags.isSExt() ? ISD::SIGN_EXTEND : ISD::ZERO_EXTEND;
3890  Arg = DAG.getNode(ExtOp, dl, MVT::i64, Arg);
3891  }
3892 
3893  // FIXME memcpy is used way more than necessary. Correctness first.
3894  // Note: "by value" is code for passing a structure by value, not
3895  // basic types.
3896  if (Flags.isByVal()) {
3897  // Note: Size includes alignment padding, so
3898  // struct x { short a; char b; }
3899  // will have Size = 4. With #pragma pack(1), it will have Size = 3.
3900  // These are the proper values we need for right-justifying the
3901  // aggregate in a parameter register.
3902  unsigned Size = Flags.getByValSize();
3903 
3904  // An empty aggregate parameter takes up no storage and no
3905  // registers.
3906  if (Size == 0)
3907  continue;
3908 
3909  unsigned BVAlign = Flags.getByValAlign();
3910  if (BVAlign > 8) {
3911  if (BVAlign % PtrByteSize != 0)
3913  "ByVal alignment is not a multiple of the pointer size");
3914 
3915  ArgOffset = ((ArgOffset+BVAlign-1)/BVAlign)*BVAlign;
3916  }
3917 
3918  // All aggregates smaller than 8 bytes must be passed right-justified.
3919  if (Size==1 || Size==2 || Size==4) {
3920  EVT VT = (Size==1) ? MVT::i8 : ((Size==2) ? MVT::i16 : MVT::i32);
3921  if (GPR_idx != NumGPRs) {
3922  SDValue Load = DAG.getExtLoad(ISD::EXTLOAD, dl, PtrVT, Chain, Arg,
3923  MachinePointerInfo(), VT,
3924  false, false, 0);
3925  MemOpChains.push_back(Load.getValue(1));
3926  RegsToPass.push_back(std::make_pair(GPR[GPR_idx++], Load));
3927 
3928  ArgOffset += PtrByteSize;
3929  continue;
3930  }
3931  }
3932 
3933  if (GPR_idx == NumGPRs && Size < 8) {
3934  SDValue Const = DAG.getConstant(PtrByteSize - Size,
3935  PtrOff.getValueType());
3936  SDValue AddPtr = DAG.getNode(ISD::ADD, dl, PtrVT, PtrOff, Const);
3937  Chain = CallSeqStart = createMemcpyOutsideCallSeq(Arg, AddPtr,
3938  CallSeqStart,
3939  Flags, DAG, dl);
3940  ArgOffset += PtrByteSize;
3941  continue;
3942  }
3943  // Copy entire object into memory. There are cases where gcc-generated
3944  // code assumes it is there, even if it could be put entirely into
3945  // registers. (This is not what the doc says.)
3946 
3947  // FIXME: The above statement is likely due to a misunderstanding of the
3948  // documents. All arguments must be copied into the parameter area BY
3949  // THE CALLEE in the event that the callee takes the address of any
3950  // formal argument. That has not yet been implemented. However, it is
3951  // reasonable to use the stack area as a staging area for the register
3952  // load.
3953 
3954  // Skip this for small aggregates, as we will use the same slot for a
3955  // right-justified copy, below.
3956  if (Size >= 8)
3957  Chain = CallSeqStart = createMemcpyOutsideCallSeq(Arg, PtrOff,
3958  CallSeqStart,
3959  Flags, DAG, dl);
3960 
3961  // When a register is available, pass a small aggregate right-justified.
3962  if (Size < 8 && GPR_idx != NumGPRs) {
3963  // The easiest way to get this right-justified in a register
3964  // is to copy the structure into the rightmost portion of a
3965  // local variable slot, then load the whole slot into the
3966  // register.
3967  // FIXME: The memcpy seems to produce pretty awful code for
3968  // small aggregates, particularly for packed ones.
3969  // FIXME: It would be preferable to use the slot in the
3970  // parameter save area instead of a new local variable.
3971  SDValue Const = DAG.getConstant(8 - Size, PtrOff.getValueType());
3972  SDValue AddPtr = DAG.getNode(ISD::ADD, dl, PtrVT, PtrOff, Const);
3973  Chain = CallSeqStart = createMemcpyOutsideCallSeq(Arg, AddPtr,
3974  CallSeqStart,
3975  Flags, DAG, dl);
3976 
3977  // Load the slot into the register.
3978  SDValue Load = DAG.getLoad(PtrVT, dl, Chain, PtrOff,
3980  false, false, false, 0);
3981  MemOpChains.push_back(Load.getValue(1));
3982  RegsToPass.push_back(std::make_pair(GPR[GPR_idx++], Load));
3983 
3984  // Done with this argument.
3985  ArgOffset += PtrByteSize;
3986  continue;
3987  }
3988 
3989  // For aggregates larger than PtrByteSize, copy the pieces of the
3990  // object that fit into registers from the parameter save area.
3991  for (unsigned j=0; j<Size; j+=PtrByteSize) {
3992  SDValue Const = DAG.getConstant(j, PtrOff.getValueType());
3993  SDValue AddArg = DAG.getNode(ISD::ADD, dl, PtrVT, Arg, Const);
3994  if (GPR_idx != NumGPRs) {
3995  SDValue Load = DAG.getLoad(PtrVT, dl, Chain, AddArg,
3997  false, false, false, 0);
3998  MemOpChains.push_back(Load.getValue(1));
3999  RegsToPass.push_back(std::make_pair(GPR[GPR_idx++], Load));
4000  ArgOffset += PtrByteSize;
4001  } else {
4002  ArgOffset += ((Size - j + PtrByteSize-1)/PtrByteSize)*PtrByteSize;
4003  break;
4004  }
4005  }
4006  continue;
4007  }
4008 
4009  switch (Arg.getSimpleValueType().SimpleTy) {
4010  default: llvm_unreachable("Unexpected ValueType for argument!");
4011  case MVT::i32:
4012  case MVT::i64:
4013  if (GPR_idx != NumGPRs) {
4014  RegsToPass.push_back(std::make_pair(GPR[GPR_idx++], Arg));
4015  } else {
4016  LowerMemOpCallTo(DAG, MF, Chain, Arg, PtrOff, SPDiff, ArgOffset,
4017  true, isTailCall, false, MemOpChains,
4018  TailCallArguments, dl);
4019  }
4020  ArgOffset += PtrByteSize;
4021  break;
4022  case MVT::f32:
4023  case MVT::f64:
4024  if (FPR_idx != NumFPRs) {
4025  RegsToPass.push_back(std::make_pair(FPR[FPR_idx++], Arg));
4026 
4027  if (isVarArg) {
4028  // A single float or an aggregate containing only a single float
4029  // must be passed right-justified in the stack doubleword, and
4030  // in the GPR, if one is available.
4031  SDValue StoreOff;
4032  if (Arg.getSimpleValueType().SimpleTy == MVT::f32) {
4033  SDValue ConstFour = DAG.getConstant(4, PtrOff.getValueType());
4034  StoreOff = DAG.getNode(ISD::ADD, dl, PtrVT, PtrOff, ConstFour);
4035  } else
4036  StoreOff = PtrOff;
4037 
4038  SDValue Store = DAG.getStore(Chain, dl, Arg, StoreOff,
4039  MachinePointerInfo(), false, false, 0);
4040  MemOpChains.push_back(Store);
4041 
4042  // Float varargs are always shadowed in available integer registers
4043  if (GPR_idx != NumGPRs) {
4044  SDValue Load = DAG.getLoad(PtrVT, dl, Store, PtrOff,
4045  MachinePointerInfo(), false, false,
4046  false, 0);
4047  MemOpChains.push_back(Load.getValue(1));
4048  RegsToPass.push_back(std::make_pair(GPR[GPR_idx++], Load));
4049  }
4050  } else if (GPR_idx != NumGPRs)
4051  // If we have any FPRs remaining, we may also have GPRs remaining.
4052  ++GPR_idx;
4053  } else {
4054  // Single-precision floating-point values are mapped to the
4055  // second (rightmost) word of the stack doubleword.
4056  if (Arg.getValueType() == MVT::f32) {
4057  SDValue ConstFour = DAG.getConstant(4, PtrOff.getValueType());
4058  PtrOff = DAG.getNode(ISD::ADD, dl, PtrVT, PtrOff, ConstFour);
4059  }
4060 
4061  LowerMemOpCallTo(DAG, MF, Chain, Arg, PtrOff, SPDiff, ArgOffset,
4062  true, isTailCall, false, MemOpChains,
4063  TailCallArguments, dl);
4064  }
4065  ArgOffset += 8;
4066  break;
4067  case MVT::v4f32:
4068  case MVT::v4i32:
4069  case MVT::v8i16:
4070  case MVT::v16i8:
4071  if (isVarArg) {
4072  // These go aligned on the stack, or in the corresponding R registers
4073  // when within range. The Darwin PPC ABI doc claims they also go in
4074  // V registers; in fact gcc does this only for arguments that are
4075  // prototyped, not for those that match the ... We do it for all
4076  // arguments, seems to work.
4077  while (ArgOffset % 16 !=0) {
4078  ArgOffset += PtrByteSize;
4079  if (GPR_idx != NumGPRs)
4080  GPR_idx++;
4081  }
4082  // We could elide this store in the case where the object fits
4083  // entirely in R registers. Maybe later.
4084  PtrOff = DAG.getNode(ISD::ADD, dl, PtrVT, StackPtr,
4085  DAG.getConstant(ArgOffset, PtrVT));
4086  SDValue Store = DAG.getStore(Chain, dl, Arg, PtrOff,
4087  MachinePointerInfo(), false, false, 0);
4088  MemOpChains.push_back(Store);
4089  if (VR_idx != NumVRs) {
4090  SDValue Load = DAG.getLoad(MVT::v4f32, dl, Store, PtrOff,
4092  false, false, false, 0);
4093  MemOpChains.push_back(Load.getValue(1));
4094  RegsToPass.push_back(std::make_pair(VR[VR_idx++], Load));
4095  }
4096  ArgOffset += 16;
4097  for (unsigned i=0; i<16; i+=PtrByteSize) {
4098  if (GPR_idx == NumGPRs)
4099  break;
4100  SDValue Ix = DAG.getNode(ISD::ADD, dl, PtrVT, PtrOff,
4101  DAG.getConstant(i, PtrVT));
4102  SDValue Load = DAG.getLoad(PtrVT, dl, Store, Ix, MachinePointerInfo(),
4103  false, false, false, 0);
4104  MemOpChains.push_back(Load.getValue(1));
4105  RegsToPass.push_back(std::make_pair(GPR[GPR_idx++], Load));
4106  }
4107  break;
4108  }
4109 
4110  // Non-varargs Altivec params generally go in registers, but have
4111  // stack space allocated at the end.
4112  if (VR_idx != NumVRs) {
4113  // Doesn't have GPR space allocated.
4114  RegsToPass.push_back(std::make_pair(VR[VR_idx++], Arg));
4115  } else {
4116  LowerMemOpCallTo(DAG, MF, Chain, Arg, PtrOff, SPDiff, ArgOffset,
4117  true, isTailCall, true, MemOpChains,
4118  TailCallArguments, dl);
4119  ArgOffset += 16;
4120  }
4121  break;
4122  }
4123  }
4124 
4125  if (!MemOpChains.empty())
4126  Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other,
4127  &MemOpChains[0], MemOpChains.size());
4128 
4129  // Check if this is an indirect call (MTCTR/BCTRL).
4130  // See PrepareCall() for more information about calls through function
4131  // pointers in the 64-bit SVR4 ABI.
4132  if (!isTailCall &&
4133  !dyn_cast<GlobalAddressSDNode>(Callee) &&
4134  !dyn_cast<ExternalSymbolSDNode>(Callee) &&
4135  !isBLACompatibleAddress(Callee, DAG)) {
4136  // Load r2 into a virtual register and store it to the TOC save area.
4137  SDValue Val = DAG.getCopyFromReg(Chain, dl, PPC::X2, MVT::i64);
4138  // TOC save area offset.
4139  SDValue PtrOff = DAG.getIntPtrConstant(40);
4140  SDValue AddPtr = DAG.getNode(ISD::ADD, dl, PtrVT, StackPtr, PtrOff);
4141  Chain = DAG.getStore(Val.getValue(1), dl, Val, AddPtr, MachinePointerInfo(),
4142  false, false, 0);
4143  // R12 must contain the address of an indirect callee. This does not
4144  // mean the MTCTR instruction must use R12; it's easier to model this
4145  // as an extra parameter, so do that.
4146  RegsToPass.push_back(std::make_pair((unsigned)PPC::X12, Callee));
4147  }
4148 
4149  // Build a sequence of copy-to-reg nodes chained together with token chain
4150  // and flag operands which copy the outgoing args into the appropriate regs.
4151  SDValue InFlag;
4152  for (unsigned i = 0, e = RegsToPass.size(); i != e; ++i) {
4153  Chain = DAG.getCopyToReg(Chain, dl, RegsToPass[i].first,
4154  RegsToPass[i].second, InFlag);
4155  InFlag = Chain.getValue(1);
4156  }
4157 
4158  if (isTailCall)
4159  PrepareTailCall(DAG, InFlag, Chain, dl, true, SPDiff, NumBytes, LROp,
4160  FPOp, true, TailCallArguments);
4161 
4162  return FinishCall(CallConv, dl, isTailCall, isVarArg, DAG,
4163  RegsToPass, InFlag, Chain, Callee, SPDiff, NumBytes,
4164  Ins, InVals);
4165 }
4166 
4167 SDValue
4168 PPCTargetLowering::LowerCall_Darwin(SDValue Chain, SDValue Callee,
4169  CallingConv::ID CallConv, bool isVarArg,
4170  bool isTailCall,
4171  const SmallVectorImpl<ISD::OutputArg> &Outs,
4172  const SmallVectorImpl<SDValue> &OutVals,
4173  const SmallVectorImpl<ISD::InputArg> &Ins,
4174  SDLoc dl, SelectionDAG &DAG,
4175  SmallVectorImpl<SDValue> &InVals) const {
4176 
4177  unsigned NumOps = Outs.size();
4178 
4179  EVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy();
4180  bool isPPC64 = PtrVT == MVT::i64;
4181  unsigned PtrByteSize = isPPC64 ? 8 : 4;
4182 
4183  MachineFunction &MF = DAG.getMachineFunction();
4184 
4185  // Mark this function as potentially containing a function that contains a
4186  // tail call. As a consequence the frame pointer will be used for dynamicalloc
4187  // and restoring the callers stack pointer in this functions epilog. This is
4188  // done because by tail calling the called function might overwrite the value
4189  // in this function's (MF) stack pointer stack slot 0(SP).
4190  if (getTargetMachine().Options.GuaranteedTailCallOpt &&
4191  CallConv == CallingConv::Fast)
4192  MF.getInfo<PPCFunctionInfo>()->setHasFastCall();
4193 
4194  unsigned nAltivecParamsAtEnd = 0;
4195 
4196  // Count how many bytes are to be pushed on the stack, including the linkage
4197  // area, and parameter passing area. We start with 24/48 bytes, which is
4198  // prereserved space for [SP][CR][LR][3 x unused].
4199  unsigned NumBytes =
4200  CalculateParameterAndLinkageAreaSize(DAG, isPPC64, isVarArg, CallConv,
4201  Outs, OutVals,
4202  nAltivecParamsAtEnd);
4203 
4204  // Calculate by how many bytes the stack has to be adjusted in case of tail
4205  // call optimization.
4206  int SPDiff = CalculateTailCallSPDiff(DAG, isTailCall, NumBytes);
4207 
4208  // To protect arguments on the stack from being clobbered in a tail call,
4209  // force all the loads to happen before doing any other lowering.
4210  if (isTailCall)
4211  Chain = DAG.getStackArgumentTokenFactor(Chain);
4212 
4213  // Adjust the stack pointer for the new arguments...
4214  // These operations are automatically eliminated by the prolog/epilog pass
4215  Chain = DAG.getCALLSEQ_START(Chain, DAG.getIntPtrConstant(NumBytes, true),
4216  dl);
4217  SDValue CallSeqStart = Chain;
4218 
4219  // Load the return address and frame pointer so it can be move somewhere else
4220  // later.
4221  SDValue LROp, FPOp;
4222  Chain = EmitTailCallLoadFPAndRetAddr(DAG, SPDiff, Chain, LROp, FPOp, true,
4223  dl);
4224 
4225  // Set up a copy of the stack pointer for use loading and storing any
4226  // arguments that may not fit in the registers available for argument
4227  // passing.
4228  SDValue StackPtr;
4229  if (isPPC64)
4230  StackPtr = DAG.getRegister(PPC::X1, MVT::i64);
4231  else
4232  StackPtr = DAG.getRegister(PPC::R1, MVT::i32);
4233 
4234  // Figure out which arguments are going to go in registers, and which in
4235  // memory. Also, if this is a vararg function, floating point operations
4236  // must be stored to our stack, and loaded into integer regs as well, if
4237  // any integer regs are available for argument passing.
4238  unsigned ArgOffset = PPCFrameLowering::getLinkageSize(isPPC64, true);
4239  unsigned GPR_idx = 0, FPR_idx = 0, VR_idx = 0;
4240 
4241  static const uint16_t GPR_32[] = { // 32-bit registers.
4242  PPC::R3, PPC::R4, PPC::R5, PPC::R6,
4243  PPC::R7, PPC::R8, PPC::R9, PPC::R10,
4244  };
4245  static const uint16_t GPR_64[] = { // 64-bit registers.
4246  PPC::X3, PPC::X4, PPC::X5, PPC::X6,
4247  PPC::X7, PPC::X8, PPC::X9, PPC::X10,
4248  };
4249  static const uint16_t *FPR = GetFPR();
4250 
4251  static const uint16_t VR[] = {
4252  PPC::V2, PPC::V3, PPC::V4, PPC::V5, PPC::V6, PPC::V7, PPC::V8,
4253  PPC::V9, PPC::V10, PPC::V11, PPC::V12, PPC::V13
4254  };
4255  const unsigned NumGPRs = array_lengthof(GPR_32);
4256  const unsigned NumFPRs = 13;
4257  const unsigned NumVRs = array_lengthof(VR);
4258 
4259  const uint16_t *GPR = isPPC64 ? GPR_64 : GPR_32;
4260 
4262  SmallVector<TailCallArgumentInfo, 8> TailCallArguments;
4263 
4264  SmallVector<SDValue, 8> MemOpChains;
4265  for (unsigned i = 0; i != NumOps; ++i) {
4266  SDValue Arg = OutVals[i];
4267  ISD::ArgFlagsTy Flags = Outs[i].Flags;
4268 
4269  // PtrOff will be used to store the current argument to the stack if a
4270  // register cannot be found for it.
4271  SDValue PtrOff;
4272 
4273  PtrOff = DAG.getConstant(ArgOffset, StackPtr.getValueType());
4274 
4275  PtrOff = DAG.getNode(ISD::ADD, dl, PtrVT, StackPtr, PtrOff);
4276 
4277  // On PPC64, promote integers to 64-bit values.
4278  if (isPPC64 && Arg.getValueType() == MVT::i32) {
4279  // FIXME: Should this use ANY_EXTEND if neither sext nor zext?
4280  unsigned ExtOp = Flags.isSExt() ? ISD::SIGN_EXTEND : ISD::ZERO_EXTEND;
4281  Arg = DAG.getNode(ExtOp, dl, MVT::i64, Arg);
4282  }
4283 
4284  // FIXME memcpy is used way more than necessary. Correctness first.
4285  // Note: "by value" is code for passing a structure by value, not
4286  // basic types.
4287  if (Flags.isByVal()) {
4288  unsigned Size = Flags.getByValSize();
4289  // Very small objects are passed right-justified. Everything else is
4290  // passed left-justified.
4291  if (Size==1 || Size==2) {
4292  EVT VT = (Size==1) ? MVT::i8 : MVT::i16;
4293  if (GPR_idx != NumGPRs) {
4294  SDValue Load = DAG.getExtLoad(ISD::EXTLOAD, dl, PtrVT, Chain, Arg,
4295  MachinePointerInfo(), VT,
4296  false, false, 0);
4297  MemOpChains.push_back(Load.getValue(1));
4298  RegsToPass.push_back(std::make_pair(GPR[GPR_idx++], Load));
4299 
4300  ArgOffset += PtrByteSize;
4301  } else {
4302  SDValue Const = DAG.getConstant(PtrByteSize - Size,
4303  PtrOff.getValueType());
4304  SDValue AddPtr = DAG.getNode(ISD::ADD, dl, PtrVT, PtrOff, Const);
4305  Chain = CallSeqStart = createMemcpyOutsideCallSeq(Arg, AddPtr,
4306  CallSeqStart,
4307  Flags, DAG, dl);
4308  ArgOffset += PtrByteSize;
4309  }
4310  continue;
4311  }
4312  // Copy entire object into memory. There are cases where gcc-generated
4313  // code assumes it is there, even if it could be put entirely into
4314  // registers. (This is not what the doc says.)
4315  Chain = CallSeqStart = createMemcpyOutsideCallSeq(Arg, PtrOff,
4316  CallSeqStart,
4317  Flags, DAG, dl);
4318 
4319  // For small aggregates (Darwin only) and aggregates >= PtrByteSize,
4320  // copy the pieces of the object that fit into registers from the
4321  // parameter save area.
4322  for (unsigned j=0; j<Size; j+=PtrByteSize) {
4323  SDValue Const = DAG.getConstant(j, PtrOff.getValueType());
4324  SDValue AddArg = DAG.getNode(ISD::ADD, dl, PtrVT, Arg, Const);
4325  if (GPR_idx != NumGPRs) {
4326  SDValue Load = DAG.getLoad(PtrVT, dl, Chain, AddArg,
4328  false, false, false, 0);
4329  MemOpChains.push_back(Load.getValue(1));
4330  RegsToPass.push_back(std::make_pair(GPR[GPR_idx++], Load));
4331  ArgOffset += PtrByteSize;
4332  } else {
4333  ArgOffset += ((Size - j + PtrByteSize-1)/PtrByteSize)*PtrByteSize;
4334  break;
4335  }
4336  }
4337  continue;
4338  }
4339 
4340  switch (Arg.getSimpleValueType().SimpleTy) {
4341  default: llvm_unreachable("Unexpected ValueType for argument!");
4342  case MVT::i32:
4343  case MVT::i64:
4344  if (GPR_idx != NumGPRs) {
4345  RegsToPass.push_back(std::make_pair(GPR[GPR_idx++], Arg));
4346  } else {
4347  LowerMemOpCallTo(DAG, MF, Chain, Arg, PtrOff, SPDiff, ArgOffset,
4348  isPPC64, isTailCall, false, MemOpChains,
4349  TailCallArguments, dl);
4350  }
4351  ArgOffset += PtrByteSize;
4352  break;
4353  case MVT::f32:
4354  case MVT::f64:
4355  if (FPR_idx != NumFPRs) {
4356  RegsToPass.push_back(std::make_pair(FPR[FPR_idx++], Arg));
4357 
4358  if (isVarArg) {
4359  SDValue Store = DAG.getStore(Chain, dl, Arg, PtrOff,
4360  MachinePointerInfo(), false, false, 0);
4361  MemOpChains.push_back(Store);
4362 
4363  // Float varargs are always shadowed in available integer registers
4364  if (GPR_idx != NumGPRs) {
4365  SDValue Load = DAG.getLoad(PtrVT, dl, Store, PtrOff,
4366  MachinePointerInfo(), false, false,
4367  false, 0);
4368  MemOpChains.push_back(Load.getValue(1));
4369  RegsToPass.push_back(std::make_pair(GPR[GPR_idx++], Load));
4370  }
4371  if (GPR_idx != NumGPRs && Arg.getValueType() == MVT::f64 && !isPPC64){
4372  SDValue ConstFour = DAG.getConstant(4, PtrOff.getValueType());
4373  PtrOff = DAG.getNode(ISD::ADD, dl, PtrVT, PtrOff, ConstFour);
4374  SDValue Load = DAG.getLoad(PtrVT, dl, Store, PtrOff,
4376  false, false, false, 0);
4377  MemOpChains.push_back(Load.getValue(1));
4378  RegsToPass.push_back(std::make_pair(GPR[GPR_idx++], Load));
4379  }
4380  } else {
4381  // If we have any FPRs remaining, we may also have GPRs remaining.
4382  // Args passed in FPRs consume either 1 (f32) or 2 (f64) available
4383  // GPRs.
4384  if (GPR_idx != NumGPRs)
4385  ++GPR_idx;
4386  if (GPR_idx != NumGPRs && Arg.getValueType() == MVT::f64 &&
4387  !isPPC64) // PPC64 has 64-bit GPR's obviously :)
4388  ++GPR_idx;
4389  }
4390  } else
4391  LowerMemOpCallTo(DAG, MF, Chain, Arg, PtrOff, SPDiff, ArgOffset,
4392  isPPC64, isTailCall, false, MemOpChains,
4393  TailCallArguments, dl);
4394  if (isPPC64)
4395  ArgOffset += 8;
4396  else
4397  ArgOffset += Arg.getValueType() == MVT::f32 ? 4 : 8;
4398  break;
4399  case MVT::v4f32:
4400  case MVT::v4i32:
4401  case MVT::v8i16:
4402  case MVT::v16i8:
4403  if (isVarArg) {
4404  // These go aligned on the stack, or in the corresponding R registers
4405  // when within range. The Darwin PPC ABI doc claims they also go in
4406  // V registers; in fact gcc does this only for arguments that are
4407  // prototyped, not for those that match the ... We do it for all
4408  // arguments, seems to work.
4409  while (ArgOffset % 16 !=0) {
4410  ArgOffset += PtrByteSize;
4411  if (GPR_idx != NumGPRs)
4412  GPR_idx++;
4413  }
4414  // We could elide this store in the case where the object fits
4415  // entirely in R registers. Maybe later.
4416  PtrOff = DAG.getNode(ISD::ADD, dl, PtrVT, StackPtr,
4417  DAG.getConstant(ArgOffset, PtrVT));
4418  SDValue Store = DAG.getStore(Chain, dl, Arg, PtrOff,
4419  MachinePointerInfo(), false, false, 0);
4420  MemOpChains.push_back(Store);
4421  if (VR_idx != NumVRs) {
4422  SDValue Load = DAG.getLoad(MVT::v4f32, dl, Store, PtrOff,
4424  false, false, false, 0);
4425  MemOpChains.push_back(Load.getValue(1));
4426  RegsToPass.push_back(std::make_pair(VR[VR_idx++], Load));
4427  }
4428  ArgOffset += 16;
4429  for (unsigned i=0; i<16; i+=PtrByteSize) {
4430  if (GPR_idx == NumGPRs)
4431  break;
4432  SDValue Ix = DAG.getNode(ISD::ADD, dl, PtrVT, PtrOff,
4433  DAG.getConstant(i, PtrVT));
4434  SDValue Load = DAG.getLoad(PtrVT, dl, Store, Ix, MachinePointerInfo(),
4435  false, false, false, 0);
4436  MemOpChains.push_back(Load.getValue(1));
4437  RegsToPass.push_back(std::make_pair(GPR[GPR_idx++], Load));
4438  }
4439  break;
4440  }
4441 
4442  // Non-varargs Altivec params generally go in registers, but have
4443  // stack space allocated at the end.
4444  if (VR_idx != NumVRs) {
4445  // Doesn't have GPR space allocated.
4446  RegsToPass.push_back(std::make_pair(VR[VR_idx++], Arg));
4447  } else if (nAltivecParamsAtEnd==0) {
4448  // We are emitting Altivec params in order.
4449  LowerMemOpCallTo(DAG, MF, Chain, Arg, PtrOff, SPDiff, ArgOffset,
4450  isPPC64, isTailCall, true, MemOpChains,
4451  TailCallArguments, dl);
4452  ArgOffset += 16;
4453  }
4454  break;
4455  }
4456  }
4457  // If all Altivec parameters fit in registers, as they usually do,
4458  // they get stack space following the non-Altivec parameters. We
4459  // don't track this here because nobody below needs it.
4460  // If there are more Altivec parameters than fit in registers emit
4461  // the stores here.
4462  if (!isVarArg && nAltivecParamsAtEnd > NumVRs) {
4463  unsigned j = 0;
4464  // Offset is aligned; skip 1st 12 params which go in V registers.
4465  ArgOffset = ((ArgOffset+15)/16)*16;
4466  ArgOffset += 12*16;
4467  for (unsigned i = 0; i != NumOps; ++i) {
4468  SDValue Arg = OutVals[i];
4469  EVT ArgType = Outs[i].VT;
4470  if (ArgType==MVT::v4f32 || ArgType==MVT::v4i32 ||
4471  ArgType==MVT::v8i16 || ArgType==MVT::v16i8) {
4472  if (++j > NumVRs) {
4473  SDValue PtrOff;
4474  // We are emitting Altivec params in order.
4475  LowerMemOpCallTo(DAG, MF, Chain, Arg, PtrOff, SPDiff, ArgOffset,
4476  isPPC64, isTailCall, true, MemOpChains,
4477  TailCallArguments, dl);
4478  ArgOffset += 16;
4479  }
4480  }
4481  }
4482  }
4483 
4484  if (!MemOpChains.empty())
4485  Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other,
4486  &MemOpChains[0], MemOpChains.size());
4487 
4488  // On Darwin, R12 must contain the address of an indirect callee. This does
4489  // not mean the MTCTR instruction must use R12; it's easier to model this as
4490  // an extra parameter, so do that.
4491  if (!isTailCall &&
4492  !dyn_cast<GlobalAddressSDNode>(Callee) &&
4493  !dyn_cast<ExternalSymbolSDNode>(Callee) &&
4494  !isBLACompatibleAddress(Callee, DAG))
4495  RegsToPass.push_back(std::make_pair((unsigned)(isPPC64 ? PPC::X12 :
4496  PPC::R12), Callee));
4497 
4498  // Build a sequence of copy-to-reg nodes chained together with token chain
4499  // and flag operands which copy the outgoing args into the appropriate regs.
4500  SDValue InFlag;
4501  for (unsigned i = 0, e = RegsToPass.size(); i != e; ++i) {
4502  Chain = DAG.getCopyToReg(Chain, dl, RegsToPass[i].first,
4503  RegsToPass[i].second, InFlag);
4504  InFlag = Chain.getValue(1);
4505  }
4506 
4507  if (isTailCall)
4508  PrepareTailCall(DAG, InFlag, Chain, dl, isPPC64, SPDiff, NumBytes, LROp,
4509  FPOp, true, TailCallArguments);
4510 
4511  return FinishCall(CallConv, dl, isTailCall, isVarArg, DAG,
4512  RegsToPass, InFlag, Chain, Callee, SPDiff, NumBytes,
4513  Ins, InVals);
4514 }
4515 
4516 bool
4517 PPCTargetLowering::CanLowerReturn(CallingConv::ID CallConv,
4518  MachineFunction &MF, bool isVarArg,
4519  const SmallVectorImpl<ISD::OutputArg> &Outs,
4520  LLVMContext &Context) const {
4522  CCState CCInfo(CallConv, isVarArg, MF, getTargetMachine(),
4523  RVLocs, Context);
4524  return CCInfo.CheckReturn(Outs, RetCC_PPC);
4525 }
4526 
4527 SDValue
4528 PPCTargetLowering::LowerReturn(SDValue Chain,
4529  CallingConv::ID CallConv, bool isVarArg,
4530  const SmallVectorImpl<ISD::OutputArg> &Outs,
4531  const SmallVectorImpl<SDValue> &OutVals,
4532  SDLoc dl, SelectionDAG &DAG) const {
4533 
4535  CCState CCInfo(CallConv, isVarArg, DAG.getMachineFunction(),
4536  getTargetMachine(), RVLocs, *DAG.getContext());
4537  CCInfo.AnalyzeReturn(Outs, RetCC_PPC);
4538 
4539  SDValue Flag;
4540  SmallVector<SDValue, 4> RetOps(1, Chain);
4541 
4542  // Copy the result values into the output registers.
4543  for (unsigned i = 0; i != RVLocs.size(); ++i) {
4544  CCValAssign &VA = RVLocs[i];
4545  assert(VA.isRegLoc() && "Can only return in registers!");
4546 
4547  SDValue Arg = OutVals[i];
4548 
4549  switch (VA.getLocInfo()) {
4550  default: llvm_unreachable("Unknown loc info!");
4551  case CCValAssign::Full: break;
4552  case CCValAssign::AExt:
4553  Arg = DAG.getNode(ISD::ANY_EXTEND, dl, VA.getLocVT(), Arg);
4554  break;
4555  case CCValAssign::ZExt:
4556  Arg = DAG.getNode(ISD::ZERO_EXTEND, dl, VA.getLocVT(), Arg);
4557  break;
4558  case CCValAssign::SExt:
4559  Arg = DAG.getNode(ISD::SIGN_EXTEND, dl, VA.getLocVT(), Arg);
4560  break;
4561  }
4562 
4563  Chain = DAG.getCopyToReg(Chain, dl, VA.getLocReg(), Arg, Flag);
4564  Flag = Chain.getValue(1);
4565  RetOps.push_back(DAG.getRegister(VA.getLocReg(), VA.getLocVT()));
4566  }
4567 
4568  RetOps[0] = Chain; // Update chain.
4569 
4570  // Add the flag if we have it.
4571  if (Flag.getNode())
4572  RetOps.push_back(Flag);
4573 
4574  return DAG.getNode(PPCISD::RET_FLAG, dl, MVT::Other,
4575  &RetOps[0], RetOps.size());
4576 }
4577 
4578 SDValue PPCTargetLowering::LowerSTACKRESTORE(SDValue Op, SelectionDAG &DAG,
4579  const PPCSubtarget &Subtarget) const {
4580  // When we pop the dynamic allocation we need to restore the SP link.
4581  SDLoc dl(Op);
4582 
4583  // Get the corect type for pointers.
4584  EVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy();
4585 
4586  // Construct the stack pointer operand.
4587  bool isPPC64 = Subtarget.isPPC64();
4588  unsigned SP = isPPC64 ? PPC::X1 : PPC::R1;
4589  SDValue StackPtr = DAG.getRegister(SP, PtrVT);
4590 
4591  // Get the operands for the STACKRESTORE.
4592  SDValue Chain = Op.getOperand(0);
4593  SDValue SaveSP = Op.getOperand(1);
4594 
4595  // Load the old link SP.
4596  SDValue LoadLinkSP = DAG.getLoad(PtrVT, dl, Chain, StackPtr,
4598  false, false, false, 0);
4599 
4600  // Restore the stack pointer.
4601  Chain = DAG.getCopyToReg(LoadLinkSP.getValue(1), dl, SP, SaveSP);
4602 
4603  // Store the old link SP.
4604  return DAG.getStore(Chain, dl, LoadLinkSP, StackPtr, MachinePointerInfo(),
4605  false, false, 0);
4606 }
4607 
4608 
4609 
4610 SDValue
4611 PPCTargetLowering::getReturnAddrFrameIndex(SelectionDAG & DAG) const {
4612  MachineFunction &MF = DAG.getMachineFunction();
4613  bool isPPC64 = PPCSubTarget.isPPC64();
4614  bool isDarwinABI = PPCSubTarget.isDarwinABI();
4615  EVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy();
4616 
4617  // Get current frame pointer save index. The users of this index will be
4618  // primarily DYNALLOC instructions.
4620  int RASI = FI->getReturnAddrSaveIndex();
4621 
4622  // If the frame pointer save index hasn't been defined yet.
4623  if (!RASI) {
4624  // Find out what the fix offset of the frame pointer save area.
4625  int LROffset = PPCFrameLowering::getReturnSaveOffset(isPPC64, isDarwinABI);
4626  // Allocate the frame index for frame pointer save area.
4627  RASI = MF.getFrameInfo()->CreateFixedObject(isPPC64? 8 : 4, LROffset, true);
4628  // Save the result.
4629  FI->setReturnAddrSaveIndex(RASI);
4630  }
4631  return DAG.getFrameIndex(RASI, PtrVT);
4632 }
4633 
4634 SDValue
4635 PPCTargetLowering::getFramePointerFrameIndex(SelectionDAG & DAG) const {
4636  MachineFunction &MF = DAG.getMachineFunction();
4637  bool isPPC64 = PPCSubTarget.isPPC64();
4638  bool isDarwinABI = PPCSubTarget.isDarwinABI();
4639  EVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy();
4640 
4641  // Get current frame pointer save index. The users of this index will be
4642  // primarily DYNALLOC instructions.
4644  int FPSI = FI->getFramePointerSaveIndex();
4645 
4646  // If the frame pointer save index hasn't been defined yet.
4647  if (!FPSI) {
4648  // Find out what the fix offset of the frame pointer save area.
4649  int FPOffset = PPCFrameLowering::getFramePointerSaveOffset(isPPC64,
4650  isDarwinABI);
4651 
4652  // Allocate the frame index for frame pointer save area.
4653  FPSI = MF.getFrameInfo()->CreateFixedObject(isPPC64? 8 : 4, FPOffset, true);
4654  // Save the result.
4655  FI->setFramePointerSaveIndex(FPSI);
4656  }
4657  return DAG.getFrameIndex(FPSI, PtrVT);
4658 }
4659 
4660 SDValue PPCTargetLowering::LowerDYNAMIC_STACKALLOC(SDValue Op,
4661  SelectionDAG &DAG,
4662  const PPCSubtarget &Subtarget) const {
4663  // Get the inputs.
4664  SDValue Chain = Op.getOperand(0);
4665  SDValue Size = Op.getOperand(1);
4666  SDLoc dl(Op);
4667 
4668  // Get the corect type for pointers.
4669  EVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy();
4670  // Negate the size.
4671  SDValue NegSize = DAG.getNode(ISD::SUB, dl, PtrVT,
4672  DAG.getConstant(0, PtrVT), Size);
4673  // Construct a node for the frame pointer save index.
4674  SDValue FPSIdx = getFramePointerFrameIndex(DAG);
4675  // Build a DYNALLOC node.
4676  SDValue Ops[3] = { Chain, NegSize, FPSIdx };
4677  SDVTList VTs = DAG.getVTList(PtrVT, MVT::Other);
4678  return DAG.getNode(PPCISD::DYNALLOC, dl, VTs, Ops, 3);
4679 }
4680 
4681 SDValue PPCTargetLowering::lowerEH_SJLJ_SETJMP(SDValue Op,
4682  SelectionDAG &DAG) const {
4683  SDLoc DL(Op);
4684  return DAG.getNode(PPCISD::EH_SJLJ_SETJMP, DL,
4685  DAG.getVTList(MVT::i32, MVT::Other),
4686  Op.getOperand(0), Op.getOperand(1));
4687 }
4688 
4689 SDValue PPCTargetLowering::lowerEH_SJLJ_LONGJMP(SDValue Op,
4690  SelectionDAG &DAG) const {
4691  SDLoc DL(Op);
4692  return DAG.getNode(PPCISD::EH_SJLJ_LONGJMP, DL, MVT::Other,
4693  Op.getOperand(0), Op.getOperand(1));
4694 }
4695 
4696 /// LowerSELECT_CC - Lower floating point select_cc's into fsel instruction when
4697 /// possible.
4698 SDValue PPCTargetLowering::LowerSELECT_CC(SDValue Op, SelectionDAG &DAG) const {
4699  // Not FP? Not a fsel.
4700  if (!Op.getOperand(0).getValueType().isFloatingPoint() ||
4702  return Op;
4703 
4704  // We might be able to do better than this under some circumstances, but in
4705  // general, fsel-based lowering of select is a finite-math-only optimization.
4706  // For more information, see section F.3 of the 2.06 ISA specification.
4707  if (!DAG.getTarget().Options.NoInfsFPMath ||
4709  return Op;
4710 
4711  ISD::CondCode CC = cast<CondCodeSDNode>(Op.getOperand(4))->get();
4712 
4713  EVT ResVT = Op.getValueType();
4714  EVT CmpVT = Op.getOperand(0).getValueType();
4715  SDValue LHS = Op.getOperand(0), RHS = Op.getOperand(1);
4716  SDValue TV = Op.getOperand(2), FV = Op.getOperand(3);
4717  SDLoc dl(Op);
4718 
4719  // If the RHS of the comparison is a 0.0, we don't need to do the
4720  // subtraction at all.
4721  SDValue Sel1;
4722  if (isFloatingPointZero(RHS))
4723  switch (CC) {
4724  default: break; // SETUO etc aren't handled by fsel.
4725  case ISD::SETNE:
4726  std::swap(TV, FV);
4727  case ISD::SETEQ:
4728  if (LHS.getValueType() == MVT::f32) // Comparison is always 64-bits
4729  LHS = DAG.getNode(ISD::FP_EXTEND, dl, MVT::f64, LHS);
4730  Sel1 = DAG.getNode(PPCISD::FSEL, dl, ResVT, LHS, TV, FV);
4731  if (Sel1.getValueType() == MVT::f32) // Comparison is always 64-bits
4732  Sel1 = DAG.getNode(ISD::FP_EXTEND, dl, MVT::f64, Sel1);
4733  return DAG.getNode(PPCISD::FSEL, dl, ResVT,
4734  DAG.getNode(ISD::FNEG, dl, MVT::f64, LHS), Sel1, FV);
4735  case ISD::SETULT:
4736  case ISD::SETLT:
4737  std::swap(TV, FV); // fsel is natively setge, swap operands for setlt
4738  case ISD::SETOGE:
4739  case ISD::SETGE:
4740  if (LHS.getValueType() == MVT::f32) // Comparison is always 64-bits
4741  LHS = DAG.getNode(ISD::FP_EXTEND, dl, MVT::f64, LHS);
4742  return DAG.getNode(PPCISD::FSEL, dl, ResVT, LHS, TV, FV);
4743  case ISD::SETUGT:
4744  case ISD::SETGT:
4745  std::swap(TV, FV); // fsel is natively setge, swap operands for setlt
4746  case ISD::SETOLE:
4747  case ISD::SETLE:
4748  if (LHS.getValueType() == MVT::f32) // Comparison is always 64-bits
4749  LHS = DAG.getNode(ISD::FP_EXTEND, dl, MVT::f64, LHS);
4750  return DAG.getNode(PPCISD::FSEL, dl, ResVT,
4751  DAG.getNode(ISD::FNEG, dl, MVT::f64, LHS), TV, FV);
4752  }
4753 
4754  SDValue Cmp;
4755  switch (CC) {
4756  default: break; // SETUO etc aren't handled by fsel.
4757  case ISD::SETNE:
4758  std::swap(TV, FV);
4759  case ISD::SETEQ:
4760  Cmp = DAG.getNode(ISD::FSUB, dl, CmpVT, LHS, RHS);
4761  if (Cmp.getValueType() == MVT::f32) // Comparison is always 64-bits
4762  Cmp = DAG.getNode(ISD::FP_EXTEND, dl, MVT::f64, Cmp);
4763  Sel1 = DAG.getNode(PPCISD::FSEL, dl, ResVT, Cmp, TV, FV);
4764  if (Sel1.getValueType() == MVT::f32) // Comparison is always 64-bits
4765  Sel1 = DAG.getNode(ISD::FP_EXTEND, dl, MVT::f64, Sel1);
4766  return DAG.getNode(PPCISD::FSEL, dl, ResVT,
4767  DAG.getNode(ISD::FNEG, dl, MVT::f64, Cmp), Sel1, FV);
4768  case ISD::SETULT:
4769  case ISD::SETLT:
4770  Cmp = DAG.getNode(ISD::FSUB, dl, CmpVT, LHS, RHS);
4771  if (Cmp.getValueType() == MVT::f32) // Comparison is always 64-bits
4772  Cmp = DAG.getNode(ISD::FP_EXTEND, dl, MVT::f64, Cmp);
4773  return DAG.getNode(PPCISD::FSEL, dl, ResVT, Cmp, FV, TV);
4774  case ISD::SETOGE:
4775  case ISD::SETGE:
4776  Cmp = DAG.getNode(ISD::FSUB, dl, CmpVT, LHS, RHS);
4777  if (Cmp.getValueType() == MVT::f32) // Comparison is always 64-bits
4778  Cmp = DAG.getNode(ISD::FP_EXTEND, dl, MVT::f64, Cmp);
4779  return DAG.getNode(PPCISD::FSEL, dl, ResVT, Cmp, TV, FV);
4780  case ISD::SETUGT:
4781  case ISD::SETGT:
4782  Cmp = DAG.getNode(ISD::FSUB, dl, CmpVT, RHS, LHS);
4783  if (Cmp.getValueType() == MVT::f32) // Comparison is always 64-bits
4784  Cmp = DAG.getNode(ISD::FP_EXTEND, dl, MVT::f64, Cmp);
4785  return DAG.getNode(PPCISD::FSEL, dl, ResVT, Cmp, FV, TV);
4786  case ISD::SETOLE:
4787  case ISD::SETLE:
4788  Cmp = DAG.getNode(ISD::FSUB, dl, CmpVT, RHS, LHS);
4789  if (Cmp.getValueType() == MVT::f32) // Comparison is always 64-bits
4790  Cmp = DAG.getNode(ISD::FP_EXTEND, dl, MVT::f64, Cmp);
4791  return DAG.getNode(PPCISD::FSEL, dl, ResVT, Cmp, TV, FV);
4792  }
4793  return Op;
4794 }
4795 
4796 // FIXME: Split this code up when LegalizeDAGTypes lands.
4797 SDValue PPCTargetLowering::LowerFP_TO_INT(SDValue Op, SelectionDAG &DAG,
4798  SDLoc dl) const {
4799  assert(Op.getOperand(0).getValueType().isFloatingPoint());
4800  SDValue Src = Op.getOperand(0);
4801  if (Src.getValueType() == MVT::f32)
4802  Src = DAG.getNode(ISD::FP_EXTEND, dl, MVT::f64, Src);
4803 
4804  SDValue Tmp;
4805  switch (Op.getSimpleValueType().SimpleTy) {
4806  default: llvm_unreachable("Unhandled FP_TO_INT type in custom expander!");
4807  case MVT::i32:
4808  Tmp = DAG.getNode(Op.getOpcode()==ISD::FP_TO_SINT ? PPCISD::FCTIWZ :
4809  (PPCSubTarget.hasFPCVT() ? PPCISD::FCTIWUZ :
4810  PPCISD::FCTIDZ),
4811  dl, MVT::f64, Src);
4812  break;
4813  case MVT::i64:
4814  assert((Op.getOpcode() == ISD::FP_TO_SINT || PPCSubTarget.hasFPCVT()) &&
4815  "i64 FP_TO_UINT is supported only with FPCVT");
4816  Tmp = DAG.getNode(Op.getOpcode()==ISD::FP_TO_SINT ? PPCISD::FCTIDZ :
4818  dl, MVT::f64, Src);
4819  break;
4820  }
4821 
4822  // Convert the FP value to an int value through memory.
4823  bool i32Stack = Op.getValueType() == MVT::i32 && PPCSubTarget.hasSTFIWX() &&
4824  (Op.getOpcode() == ISD::FP_TO_SINT || PPCSubTarget.hasFPCVT());
4825  SDValue FIPtr = DAG.CreateStackTemporary(i32Stack ? MVT::i32 : MVT::f64);
4826  int FI = cast<FrameIndexSDNode>(FIPtr)->getIndex();
4828 
4829  // Emit a store to the stack slot.
4830  SDValue Chain;
4831  if (i32Stack) {
4832  MachineFunction &MF = DAG.getMachineFunction();
4833  MachineMemOperand *MMO =
4835  SDValue Ops[] = { DAG.getEntryNode(), Tmp, FIPtr };
4836  Chain = DAG.getMemIntrinsicNode(PPCISD::STFIWX, dl,
4837  DAG.getVTList(MVT::Other), Ops, array_lengthof(Ops),
4838  MVT::i32, MMO);
4839  } else
4840  Chain = DAG.getStore(DAG.getEntryNode(), dl, Tmp, FIPtr,
4841  MPI, false, false, 0);
4842 
4843  // Result is a load from the stack slot. If loading 4 bytes, make sure to
4844  // add in a bias.
4845  if (Op.getValueType() == MVT::i32 && !i32Stack) {
4846  FIPtr = DAG.getNode(ISD::ADD, dl, FIPtr.getValueType(), FIPtr,
4847  DAG.getConstant(4, FIPtr.getValueType()));
4848  MPI = MachinePointerInfo();
4849  }
4850 
4851  return DAG.getLoad(Op.getValueType(), dl, Chain, FIPtr, MPI,
4852  false, false, false, 0);
4853 }
4854 
4855 SDValue PPCTargetLowering::LowerINT_TO_FP(SDValue Op,
4856  SelectionDAG &DAG) const {
4857  SDLoc dl(Op);
4858  // Don't handle ppc_fp128 here; let it be lowered to a libcall.
4859  if (Op.getValueType() != MVT::f32 && Op.getValueType() != MVT::f64)
4860  return SDValue();
4861 
4862  assert((Op.getOpcode() == ISD::SINT_TO_FP || PPCSubTarget.hasFPCVT()) &&
4863  "UINT_TO_FP is supported only with FPCVT");
4864 
4865  // If we have FCFIDS, then use it when converting to single-precision.
4866  // Otherwise, convert to double-precision and then round.
4867  unsigned FCFOp = (PPCSubTarget.hasFPCVT() && Op.getValueType() == MVT::f32) ?
4868  (Op.getOpcode() == ISD::UINT_TO_FP ?
4870  (Op.getOpcode() == ISD::UINT_TO_FP ?
4872  MVT FCFTy = (PPCSubTarget.hasFPCVT() && Op.getValueType() == MVT::f32) ?
4873  MVT::f32 : MVT::f64;
4874 
4875  if (Op.getOperand(0).getValueType() == MVT::i64) {
4876  SDValue SINT = Op.getOperand(0);
4877  // When converting to single-precision, we actually need to convert
4878  // to double-precision first and then round to single-precision.
4879  // To avoid double-rounding effects during that operation, we have
4880  // to prepare the input operand. Bits that might be truncated when
4881  // converting to double-precision are replaced by a bit that won't
4882  // be lost at this stage, but is below the single-precision rounding
4883  // position.
4884  //
4885  // However, if -enable-unsafe-fp-math is in effect, accept double
4886  // rounding to avoid the extra overhead.
4887  if (Op.getValueType() == MVT::f32 &&
4888  !PPCSubTarget.hasFPCVT() &&
4889  !DAG.getTarget().Options.UnsafeFPMath) {
4890 
4891  // Twiddle input to make sure the low 11 bits are zero. (If this
4892  // is the case, we are guaranteed the value will fit into the 53 bit
4893  // mantissa of an IEEE double-precision value without rounding.)
4894  // If any of those low 11 bits were not zero originally, make sure
4895  // bit 12 (value 2048) is set instead, so that the final rounding
4896  // to single-precision gets the correct result.
4897  SDValue Round = DAG.getNode(ISD::AND, dl, MVT::i64,
4898  SINT, DAG.getConstant(2047, MVT::i64));
4899  Round = DAG.getNode(ISD::ADD, dl, MVT::i64,
4900  Round, DAG.getConstant(2047, MVT::i64));
4901  Round = DAG.getNode(ISD::OR, dl, MVT::i64, Round, SINT);
4902  Round = DAG.getNode(ISD::AND, dl, MVT::i64,
4903  Round, DAG.getConstant(-2048, MVT::i64));
4904 
4905  // However, we cannot use that value unconditionally: if the magnitude
4906  // of the input value is small, the bit-twiddling we did above might
4907  // end up visibly changing the output. Fortunately, in that case, we
4908  // don't need to twiddle bits since the original input will convert
4909  // exactly to double-precision floating-point already. Therefore,
4910  // construct a conditional to use the original value if the top 11
4911  // bits are all sign-bit copies, and use the rounded value computed
4912  // above otherwise.
4913  SDValue Cond = DAG.getNode(ISD::SRA, dl, MVT::i64,
4914  SINT, DAG.getConstant(53, MVT::i32));
4915  Cond = DAG.getNode(ISD::ADD, dl, MVT::i64,
4916  Cond, DAG.getConstant(1, MVT::i64));
4917  Cond = DAG.getSetCC(dl, MVT::i32,
4918  Cond, DAG.getConstant(1, MVT::i64), ISD::SETUGT);
4919 
4920  SINT = DAG.getNode(ISD::SELECT, dl, MVT::i64, Cond, Round, SINT);
4921  }
4922 
4923  SDValue Bits = DAG.getNode(ISD::BITCAST, dl, MVT::f64, SINT);
4924  SDValue FP = DAG.getNode(FCFOp, dl, FCFTy, Bits);
4925 
4926  if (Op.getValueType() == MVT::f32 && !PPCSubTarget.hasFPCVT())
4927  FP = DAG.getNode(ISD::FP_ROUND, dl,
4928  MVT::f32, FP, DAG.getIntPtrConstant(0));
4929  return FP;
4930  }
4931 
4932  assert(Op.getOperand(0).getValueType() == MVT::i32 &&
4933  "Unhandled INT_TO_FP type in custom expander!");
4934  // Since we only generate this in 64-bit mode, we can take advantage of
4935  // 64-bit registers. In particular, sign extend the input value into the
4936  // 64-bit register with extsw, store the WHOLE 64-bit value into the stack
4937  // then lfd it and fcfid it.
4938  MachineFunction &MF = DAG.getMachineFunction();
4939  MachineFrameInfo *FrameInfo = MF.getFrameInfo();
4940  EVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy();
4941 
4942  SDValue Ld;
4943  if (PPCSubTarget.hasLFIWAX() || PPCSubTarget.hasFPCVT()) {
4944  int FrameIdx = FrameInfo->CreateStackObject(4, 4, false);
4945  SDValue FIdx = DAG.getFrameIndex(FrameIdx, PtrVT);
4946 
4947  SDValue Store = DAG.getStore(DAG.getEntryNode(), dl, Op.getOperand(0), FIdx,
4949  false, false, 0);
4950 
4951  assert(cast<StoreSDNode>(Store)->getMemoryVT() == MVT::i32 &&
4952  "Expected an i32 store");
4953  MachineMemOperand *MMO =
4954  MF.getMachineMemOperand(MachinePointerInfo::getFixedStack(FrameIdx),
4956  SDValue Ops[] = { Store, FIdx };
4957  Ld = DAG.getMemIntrinsicNode(Op.getOpcode() == ISD::UINT_TO_FP ?
4959  dl, DAG.getVTList(MVT::f64, MVT::Other),
4960  Ops, 2, MVT::i32, MMO);
4961  } else {
4962  assert(PPCSubTarget.isPPC64() &&
4963  "i32->FP without LFIWAX supported only on PPC64");
4964 
4965  int FrameIdx = FrameInfo->CreateStackObject(8, 8, false);
4966  SDValue FIdx = DAG.getFrameIndex(FrameIdx, PtrVT);
4967 
4968  SDValue Ext64 = DAG.getNode(ISD::SIGN_EXTEND, dl, MVT::i64,
4969  Op.getOperand(0));
4970 
4971  // STD the extended value into the stack slot.
4972  SDValue Store = DAG.getStore(DAG.getEntryNode(), dl, Ext64, FIdx,
4974  false, false, 0);
4975 
4976  // Load the value as a double.
4977  Ld = DAG.getLoad(MVT::f64, dl, Store, FIdx,
4979  false, false, false, 0);
4980  }
4981 
4982  // FCFID it and return it.
4983  SDValue FP = DAG.getNode(FCFOp, dl, FCFTy, Ld);
4984  if (Op.getValueType() == MVT::f32 && !PPCSubTarget.hasFPCVT())
4985  FP = DAG.getNode(ISD::FP_ROUND, dl, MVT::f32, FP, DAG.getIntPtrConstant(0));
4986  return FP;
4987 }
4988 
4989 SDValue PPCTargetLowering::LowerFLT_ROUNDS_(SDValue Op,
4990  SelectionDAG &DAG) const {
4991  SDLoc dl(Op);
4992  /*
4993  The rounding mode is in bits 30:31 of FPSR, and has the following
4994  settings:
4995  00 Round to nearest
4996  01 Round to 0
4997  10 Round to +inf
4998  11 Round to -inf
4999 
5000  FLT_ROUNDS, on the other hand, expects the following:
5001  -1 Undefined
5002  0 Round to 0
5003  1 Round to nearest
5004  2 Round to +inf
5005  3 Round to -inf
5006 
5007  To perform the conversion, we do:
5008  ((FPSCR & 0x3) ^ ((~FPSCR & 0x3) >> 1))
5009  */
5010 
5011  MachineFunction &MF = DAG.getMachineFunction();
5012  EVT VT = Op.getValueType();
5013  EVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy();
5014  SDValue MFFSreg, InFlag;
5015 
5016  // Save FP Control Word to register
5017  EVT NodeTys[] = {
5018  MVT::f64, // return register
5019  MVT::Glue // unused in this context
5020  };
5021  SDValue Chain = DAG.getNode(PPCISD::MFFS, dl, NodeTys, &InFlag, 0);
5022 
5023  // Save FP register to stack slot
5024  int SSFI = MF.getFrameInfo()->CreateStackObject(8, 8, false);
5025  SDValue StackSlot = DAG.getFrameIndex(SSFI, PtrVT);
5026  SDValue Store = DAG.getStore(DAG.getEntryNode(), dl, Chain,
5027  StackSlot, MachinePointerInfo(), false, false,0);
5028 
5029  // Load FP Control Word from low 32 bits of stack slot.
5030  SDValue Four = DAG.getConstant(4, PtrVT);
5031  SDValue Addr = DAG.getNode(ISD::ADD, dl, PtrVT, StackSlot, Four);
5032  SDValue CWD = DAG.getLoad(MVT::i32, dl, Store, Addr, MachinePointerInfo(),
5033  false, false, false, 0);
5034 
5035  // Transform as necessary
5036  SDValue CWD1 =
5037  DAG.getNode(ISD::AND, dl, MVT::i32,
5038  CWD, DAG.getConstant(3, MVT::i32));
5039  SDValue CWD2 =
5040  DAG.getNode(ISD::SRL, dl, MVT::i32,
5041  DAG.getNode(ISD::AND, dl, MVT::i32,
5042  DAG.getNode(ISD::XOR, dl, MVT::i32,
5043  CWD, DAG.getConstant(3, MVT::i32)),
5044  DAG.getConstant(3, MVT::i32)),
5045  DAG.getConstant(1, MVT::i32));
5046 
5047  SDValue RetVal =
5048  DAG.getNode(ISD::XOR, dl, MVT::i32, CWD1, CWD2);
5049 
5050  return DAG.getNode((VT.getSizeInBits() < 16 ?
5051  ISD::TRUNCATE : ISD::ZERO_EXTEND), dl, VT, RetVal);
5052 }
5053 
5054 SDValue PPCTargetLowering::LowerSHL_PARTS(SDValue Op, SelectionDAG &DAG) const {
5055  EVT VT = Op.getValueType();
5056  unsigned BitWidth = VT.getSizeInBits();
5057  SDLoc dl(Op);
5058  assert(Op.getNumOperands() == 3 &&
5059  VT == Op.getOperand(1).getValueType() &&
5060  "Unexpected SHL!");
5061 
5062  // Expand into a bunch of logical ops. Note that these ops
5063  // depend on the PPC behavior for oversized shift amounts.
5064  SDValue Lo = Op.getOperand(0);
5065  SDValue Hi = Op.getOperand(1);
5066  SDValue Amt = Op.getOperand(2);
5067  EVT AmtVT = Amt.getValueType();
5068 
5069  SDValue Tmp1 = DAG.getNode(ISD::SUB, dl, AmtVT,
5070  DAG.getConstant(BitWidth, AmtVT), Amt);
5071  SDValue Tmp2 = DAG.getNode(PPCISD::SHL, dl, VT, Hi, Amt);
5072  SDValue Tmp3 = DAG.getNode(PPCISD::SRL, dl, VT, Lo, Tmp1);
5073  SDValue Tmp4 = DAG.getNode(ISD::OR , dl, VT, Tmp2, Tmp3);
5074  SDValue Tmp5 = DAG.getNode(ISD::ADD, dl, AmtVT, Amt,
5075  DAG.getConstant(-BitWidth, AmtVT));
5076  SDValue Tmp6 = DAG.getNode(PPCISD::SHL, dl, VT, Lo, Tmp5);
5077  SDValue OutHi = DAG.getNode(ISD::OR, dl, VT, Tmp4, Tmp6);
5078  SDValue OutLo = DAG.getNode(PPCISD::SHL, dl, VT, Lo, Amt);
5079  SDValue OutOps[] = { OutLo, OutHi };
5080  return DAG.getMergeValues(OutOps, 2, dl);
5081 }
5082 
5083 SDValue PPCTargetLowering::LowerSRL_PARTS(SDValue Op, SelectionDAG &DAG) const {
5084  EVT VT = Op.getValueType();
5085  SDLoc dl(Op);
5086  unsigned BitWidth = VT.getSizeInBits();
5087  assert(Op.getNumOperands() == 3 &&
5088  VT == Op.getOperand(1).getValueType() &&
5089  "Unexpected SRL!");
5090 
5091  // Expand into a bunch of logical ops. Note that these ops
5092  // depend on the PPC behavior for oversized shift amounts.
5093  SDValue Lo = Op.getOperand(0);
5094  SDValue Hi = Op.getOperand(1);
5095  SDValue Amt = Op.getOperand(2);
5096  EVT AmtVT = Amt.getValueType();
5097 
5098  SDValue Tmp1 = DAG.getNode(ISD::SUB, dl, AmtVT,
5099  DAG.getConstant(BitWidth, AmtVT), Amt);
5100  SDValue Tmp2 = DAG.getNode(PPCISD::SRL, dl, VT, Lo, Amt);
5101  SDValue Tmp3 = DAG.getNode(PPCISD::SHL, dl, VT, Hi, Tmp1);
5102  SDValue Tmp4 = DAG.getNode(ISD::OR, dl, VT, Tmp2, Tmp3);
5103  SDValue Tmp5 = DAG.getNode(ISD::ADD, dl, AmtVT, Amt,
5104  DAG.getConstant(-BitWidth, AmtVT));
5105  SDValue Tmp6 = DAG.getNode(PPCISD::SRL, dl, VT, Hi, Tmp5);
5106  SDValue OutLo = DAG.getNode(ISD::OR, dl, VT, Tmp4, Tmp6);
5107  SDValue OutHi = DAG.getNode(PPCISD::SRL, dl, VT, Hi, Amt);
5108  SDValue OutOps[] = { OutLo, OutHi };
5109  return DAG.getMergeValues(OutOps, 2, dl);
5110 }
5111 
5112 SDValue PPCTargetLowering::LowerSRA_PARTS(SDValue Op, SelectionDAG &DAG) const {
5113  SDLoc dl(Op);
5114  EVT VT = Op.getValueType();
5115  unsigned BitWidth = VT.getSizeInBits();
5116  assert(Op.getNumOperands() == 3 &&
5117  VT == Op.getOperand(1).getValueType() &&
5118  "Unexpected SRA!");
5119 
5120  // Expand into a bunch of logical ops, followed by a select_cc.
5121  SDValue Lo = Op.getOperand(0);
5122  SDValue Hi = Op.getOperand(1);
5123  SDValue Amt = Op.getOperand(2);
5124  EVT AmtVT = Amt.getValueType();
5125 
5126  SDValue Tmp1 = DAG.getNode(ISD::SUB, dl, AmtVT,
5127  DAG.getConstant(BitWidth, AmtVT), Amt);
5128  SDValue Tmp2 = DAG.getNode(PPCISD::SRL, dl, VT, Lo, Amt);
5129  SDValue Tmp3 = DAG.getNode(PPCISD::SHL, dl, VT, Hi, Tmp1);
5130  SDValue Tmp4 = DAG.getNode(ISD::OR, dl, VT, Tmp2, Tmp3);
5131  SDValue Tmp5 = DAG.getNode(ISD::ADD, dl, AmtVT, Amt,
5132  DAG.getConstant(-BitWidth, AmtVT));
5133  SDValue Tmp6 = DAG.getNode(PPCISD::SRA, dl, VT, Hi, Tmp5);
5134  SDValue OutHi = DAG.getNode(PPCISD::SRA, dl, VT, Hi, Amt);
5135  SDValue OutLo = DAG.getSelectCC(dl, Tmp5, DAG.getConstant(0, AmtVT),
5136  Tmp4, Tmp6, ISD::SETLE);
5137  SDValue OutOps[] = { OutLo, OutHi };
5138  return DAG.getMergeValues(OutOps, 2, dl);
5139 }
5140 
5141 //===----------------------------------------------------------------------===//
5142 // Vector related lowering.
5143 //
5144 
5145 /// BuildSplatI - Build a canonical splati of Val with an element size of
5146 /// SplatSize. Cast the result to VT.
5147 static SDValue BuildSplatI(int Val, unsigned SplatSize, EVT VT,
5148  SelectionDAG &DAG, SDLoc dl) {
5149  assert(Val >= -16 && Val <= 15 && "vsplti is out of range!");
5150 
5151  static const EVT VTys[] = { // canonical VT to use for each size.
5153  };
5154 
5155  EVT ReqVT = VT != MVT::Other ? VT : VTys[SplatSize-1];
5156 
5157  // Force vspltis[hw] -1 to vspltisb -1 to canonicalize.
5158  if (Val == -1)
5159  SplatSize = 1;
5160 
5161  EVT CanonicalVT = VTys[SplatSize-1];
5162 
5163  // Build a canonical splat for this value.
5164  SDValue Elt = DAG.getConstant(Val, MVT::i32);
5166  Ops.assign(CanonicalVT.getVectorNumElements(), Elt);
5167  SDValue Res = DAG.getNode(ISD::BUILD_VECTOR, dl, CanonicalVT,
5168  &Ops[0], Ops.size());
5169  return DAG.getNode(ISD::BITCAST, dl, ReqVT, Res);
5170 }
5171 
5172 /// BuildIntrinsicOp - Return a unary operator intrinsic node with the
5173 /// specified intrinsic ID.
5174 static SDValue BuildIntrinsicOp(unsigned IID, SDValue Op,
5175  SelectionDAG &DAG, SDLoc dl,
5176  EVT DestVT = MVT::Other) {
5177  if (DestVT == MVT::Other) DestVT = Op.getValueType();
5178  return DAG.getNode(ISD::INTRINSIC_WO_CHAIN, dl, DestVT,
5179  DAG.getConstant(IID, MVT::i32), Op);
5180 }
5181 
5182 /// BuildIntrinsicOp - Return a binary operator intrinsic node with the
5183 /// specified intrinsic ID.
5184 static SDValue BuildIntrinsicOp(unsigned IID, SDValue LHS, SDValue RHS,
5185  SelectionDAG &DAG, SDLoc dl,
5186  EVT DestVT = MVT::Other) {
5187  if (DestVT == MVT::Other) DestVT = LHS.getValueType();
5188  return DAG.getNode(ISD::INTRINSIC_WO_CHAIN, dl, DestVT,
5189  DAG.getConstant(IID, MVT::i32), LHS, RHS);
5190 }
5191 
5192 /// BuildIntrinsicOp - Return a ternary operator intrinsic node with the
5193 /// specified intrinsic ID.
5194 static SDValue BuildIntrinsicOp(unsigned IID, SDValue Op0, SDValue Op1,
5195  SDValue Op2, SelectionDAG &DAG,
5196  SDLoc dl, EVT DestVT = MVT::Other) {
5197  if (DestVT == MVT::Other) DestVT = Op0.getValueType();
5198  return DAG.getNode(ISD::INTRINSIC_WO_CHAIN, dl, DestVT,
5199  DAG.getConstant(IID, MVT::i32), Op0, Op1, Op2);
5200 }
5201 
5202 
5203 /// BuildVSLDOI - Return a VECTOR_SHUFFLE that is a vsldoi of the specified
5204 /// amount. The result has the specified value type.
5205 static SDValue BuildVSLDOI(SDValue LHS, SDValue RHS, unsigned Amt,
5206  EVT VT, SelectionDAG &DAG, SDLoc dl) {
5207  // Force LHS/RHS to be the right type.
5208  LHS = DAG.getNode(ISD::BITCAST, dl, MVT::v16i8, LHS);
5209  RHS = DAG.getNode(ISD::BITCAST, dl, MVT::v16i8, RHS);
5210 
5211  int Ops[16];
5212  for (unsigned i = 0; i != 16; ++i)
5213  Ops[i] = i + Amt;
5214  SDValue T = DAG.getVectorShuffle(MVT::v16i8, dl, LHS, RHS, Ops);
5215  return DAG.getNode(ISD::BITCAST, dl, VT, T);
5216 }
5217 
5218 // If this is a case we can't handle, return null and let the default
5219 // expansion code take care of it. If we CAN select this case, and if it
5220 // selects to a single instruction, return Op. Otherwise, if we can codegen
5221 // this case more efficiently than a constant pool load, lower it to the
5222 // sequence of ops that should be used.
5223 SDValue PPCTargetLowering::LowerBUILD_VECTOR(SDValue Op,
5224  SelectionDAG &DAG) const {
5225  SDLoc dl(Op);
5227  assert(BVN != 0 && "Expected a BuildVectorSDNode in LowerBUILD_VECTOR");
5228 
5229  // Check if this is a splat of a constant value.
5230  APInt APSplatBits, APSplatUndef;
5231  unsigned SplatBitSize;
5232  bool HasAnyUndefs;
5233  if (! BVN->isConstantSplat(APSplatBits, APSplatUndef, SplatBitSize,
5234  HasAnyUndefs, 0, true) || SplatBitSize > 32)
5235  return SDValue();
5236 
5237  unsigned SplatBits = APSplatBits.getZExtValue();
5238  unsigned SplatUndef = APSplatUndef.getZExtValue();
5239  unsigned SplatSize = SplatBitSize / 8;
5240 
5241  // First, handle single instruction cases.
5242 
5243  // All zeros?
5244  if (SplatBits == 0) {
5245  // Canonicalize all zero vectors to be v4i32.
5246  if (Op.getValueType() != MVT::v4i32 || HasAnyUndefs) {
5247  SDValue Z = DAG.getConstant(0, MVT::i32);
5248  Z = DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v4i32, Z, Z, Z, Z);
5249  Op = DAG.getNode(ISD::BITCAST, dl, Op.getValueType(), Z);
5250  }
5251  return Op;
5252  }
5253 
5254  // If the sign extended value is in the range [-16,15], use VSPLTI[bhw].
5255  int32_t SextVal= (int32_t(SplatBits << (32-SplatBitSize)) >>
5256  (32-SplatBitSize));
5257  if (SextVal >= -16 && SextVal <= 15)
5258  return BuildSplatI(SextVal, SplatSize, Op.getValueType(), DAG, dl);
5259 
5260 
5261  // Two instruction sequences.
5262 
5263  // If this value is in the range [-32,30] and is even, use:
5264  // VSPLTI[bhw](val/2) + VSPLTI[bhw](val/2)
5265  // If this value is in the range [17,31] and is odd, use:
5266  // VSPLTI[bhw](val-16) - VSPLTI[bhw](-16)
5267  // If this value is in the range [-31,-17] and is odd, use:
5268  // VSPLTI[bhw](val+16) + VSPLTI[bhw](-16)
5269  // Note the last two are three-instruction sequences.
5270  if (SextVal >= -32 && SextVal <= 31) {
5271  // To avoid having these optimizations undone by constant folding,
5272  // we convert to a pseudo that will be expanded later into one of
5273  // the above forms.
5274  SDValue Elt = DAG.getConstant(SextVal, MVT::i32);
5275  EVT VT = Op.getValueType();
5276  int Size = VT == MVT::v16i8 ? 1 : (VT == MVT::v8i16 ? 2 : 4);
5277  SDValue EltSize = DAG.getConstant(Size, MVT::i32);
5278  return DAG.getNode(PPCISD::VADD_SPLAT, dl, VT, Elt, EltSize);
5279  }
5280 
5281  // If this is 0x8000_0000 x 4, turn into vspltisw + vslw. If it is
5282  // 0x7FFF_FFFF x 4, turn it into not(0x8000_0000). This is important
5283  // for fneg/fabs.
5284  if (SplatSize == 4 && SplatBits == (0x7FFFFFFF&~SplatUndef)) {
5285  // Make -1 and vspltisw -1:
5286  SDValue OnesV = BuildSplatI(-1, 4, MVT::v4i32, DAG, dl);
5287 
5288  // Make the VSLW intrinsic, computing 0x8000_0000.
5290  OnesV, DAG, dl);
5291 
5292  // xor by OnesV to invert it.
5293  Res = DAG.getNode(ISD::XOR, dl, MVT::v4i32, Res, OnesV);
5294  return DAG.getNode(ISD::BITCAST, dl, Op.getValueType(), Res);
5295  }
5296 
5297  // Check to see if this is a wide variety of vsplti*, binop self cases.
5298  static const signed char SplatCsts[] = {
5299  -1, 1, -2, 2, -3, 3, -4, 4, -5, 5, -6, 6, -7, 7,
5300  -8, 8, -9, 9, -10, 10, -11, 11, -12, 12, -13, 13, 14, -14, 15, -15, -16
5301  };
5302 
5303  for (unsigned idx = 0; idx < array_lengthof(SplatCsts); ++idx) {
5304  // Indirect through the SplatCsts array so that we favor 'vsplti -1' for
5305  // cases which are ambiguous (e.g. formation of 0x8000_0000). 'vsplti -1'
5306  int i = SplatCsts[idx];
5307 
5308  // Figure out what shift amount will be used by altivec if shifted by i in
5309  // this splat size.
5310  unsigned TypeShiftAmt = i & (SplatBitSize-1);
5311 
5312  // vsplti + shl self.
5313  if (SextVal == (int)((unsigned)i << TypeShiftAmt)) {
5314  SDValue Res = BuildSplatI(i, SplatSize, MVT::Other, DAG, dl);
5315  static const unsigned IIDs[] = { // Intrinsic to use for each size.
5318  };
5319  Res = BuildIntrinsicOp(IIDs[SplatSize-1], Res, Res, DAG, dl);
5320  return DAG.getNode(ISD::BITCAST, dl, Op.getValueType(), Res);
5321  }
5322 
5323  // vsplti + srl self.
5324  if (SextVal == (int)((unsigned)i >> TypeShiftAmt)) {
5325  SDValue Res = BuildSplatI(i, SplatSize, MVT::Other, DAG, dl);
5326  static const unsigned IIDs[] = { // Intrinsic to use for each size.
5329  };
5330  Res = BuildIntrinsicOp(IIDs[SplatSize-1], Res, Res, DAG, dl);
5331  return DAG.getNode(ISD::BITCAST, dl, Op.getValueType(), Res);
5332  }
5333 
5334  // vsplti + sra self.
5335  if (SextVal == (int)((unsigned)i >> TypeShiftAmt)) {
5336  SDValue Res = BuildSplatI(i, SplatSize, MVT::Other, DAG, dl);
5337  static const unsigned IIDs[] = { // Intrinsic to use for each size.
5340  };
5341  Res = BuildIntrinsicOp(IIDs[SplatSize-1], Res, Res, DAG, dl);
5342  return DAG.getNode(ISD::BITCAST, dl, Op.getValueType(), Res);
5343  }
5344 
5345  // vsplti + rol self.
5346  if (SextVal == (int)(((unsigned)i << TypeShiftAmt) |
5347  ((unsigned)i >> (SplatBitSize-TypeShiftAmt)))) {
5348  SDValue Res = BuildSplatI(i, SplatSize, MVT::Other, DAG, dl);
5349  static const unsigned IIDs[] = { // Intrinsic to use for each size.
5352  };
5353  Res = BuildIntrinsicOp(IIDs[SplatSize-1], Res, Res, DAG, dl);
5354  return DAG.getNode(ISD::BITCAST, dl, Op.getValueType(), Res);
5355  }
5356 
5357  // t = vsplti c, result = vsldoi t, t, 1
5358  if (SextVal == (int)(((unsigned)i << 8) | (i < 0 ? 0xFF : 0))) {
5359  SDValue T = BuildSplatI(i, SplatSize, MVT::v16i8, DAG, dl);
5360  return BuildVSLDOI(T, T, 1, Op.getValueType(), DAG, dl);
5361  }
5362  // t = vsplti c, result = vsldoi t, t, 2
5363  if (SextVal == (int)(((unsigned)i << 16) | (i < 0 ? 0xFFFF : 0))) {
5364  SDValue T = BuildSplatI(i, SplatSize, MVT::v16i8, DAG, dl);
5365  return BuildVSLDOI(T, T, 2, Op.getValueType(), DAG, dl);
5366  }
5367  // t = vsplti c, result = vsldoi t, t, 3
5368  if (SextVal == (int)(((unsigned)i << 24) | (i < 0 ? 0xFFFFFF : 0))) {
5369  SDValue T = BuildSplatI(i, SplatSize, MVT::v16i8, DAG, dl);
5370  return BuildVSLDOI(T, T, 3, Op.getValueType(), DAG, dl);
5371  }
5372  }
5373 
5374  return SDValue();
5375 }
5376 
5377 /// GeneratePerfectShuffle - Given an entry in the perfect-shuffle table, emit
5378 /// the specified operations to build the shuffle.
5379 static SDValue GeneratePerfectShuffle(unsigned PFEntry, SDValue LHS,
5380  SDValue RHS, SelectionDAG &DAG,
5381  SDLoc dl) {
5382  unsigned OpNum = (PFEntry >> 26) & 0x0F;
5383  unsigned LHSID = (PFEntry >> 13) & ((1 << 13)-1);
5384  unsigned RHSID = (PFEntry >> 0) & ((1 << 13)-1);
5385 
5386  enum {
5387  OP_COPY = 0, // Copy, used for things like <u,u,u,3> to say it is <0,1,2,3>
5388  OP_VMRGHW,
5389  OP_VMRGLW,
5390  OP_VSPLTISW0,
5391  OP_VSPLTISW1,
5392  OP_VSPLTISW2,
5393  OP_VSPLTISW3,
5394  OP_VSLDOI4,
5395  OP_VSLDOI8,
5396  OP_VSLDOI12
5397  };
5398 
5399  if (OpNum == OP_COPY) {
5400  if (LHSID == (1*9+2)*9+3) return LHS;
5401  assert(LHSID == ((4*9+5)*9+6)*9+7 && "Illegal OP_COPY!");
5402  return RHS;
5403  }
5404 
5405  SDValue OpLHS, OpRHS;
5406  OpLHS = GeneratePerfectShuffle(PerfectShuffleTable[LHSID], LHS, RHS, DAG, dl);
5407  OpRHS = GeneratePerfectShuffle(PerfectShuffleTable[RHSID], LHS, RHS, DAG, dl);
5408 
5409  int ShufIdxs[16];
5410  switch (OpNum) {
5411  default: llvm_unreachable("Unknown i32 permute!");
5412  case OP_VMRGHW:
5413  ShufIdxs[ 0] = 0; ShufIdxs[ 1] = 1; ShufIdxs[ 2] = 2; ShufIdxs[ 3] = 3;
5414  ShufIdxs[ 4] = 16; ShufIdxs[ 5] = 17; ShufIdxs[ 6] = 18; ShufIdxs[ 7] = 19;
5415  ShufIdxs[ 8] = 4; ShufIdxs[ 9] = 5; ShufIdxs[10] = 6; ShufIdxs[11] = 7;
5416  ShufIdxs[12] = 20; ShufIdxs[13] = 21; ShufIdxs[14] = 22; ShufIdxs[15] = 23;
5417  break;
5418  case OP_VMRGLW:
5419  ShufIdxs[ 0] = 8; ShufIdxs[ 1] = 9; ShufIdxs[ 2] = 10; ShufIdxs[ 3] = 11;
5420  ShufIdxs[ 4] = 24; ShufIdxs[ 5] = 25; ShufIdxs[ 6] = 26; ShufIdxs[ 7] = 27;
5421  ShufIdxs[ 8] = 12; ShufIdxs[ 9] = 13; ShufIdxs[10] = 14; ShufIdxs[11] = 15;
5422  ShufIdxs[12] = 28; ShufIdxs[13] = 29; ShufIdxs[14] = 30; ShufIdxs[15] = 31;
5423  break;
5424  case OP_VSPLTISW0:
5425  for (unsigned i = 0; i != 16; ++i)
5426  ShufIdxs[i] = (i&3)+0;
5427  break;
5428  case OP_VSPLTISW1:
5429  for (unsigned i = 0; i != 16; ++i)
5430  ShufIdxs[i] = (i&3)+4;
5431  break;
5432  case OP_VSPLTISW2:
5433  for (unsigned i = 0; i != 16; ++i)
5434  ShufIdxs[i] = (i&3)+8;
5435  break;
5436  case OP_VSPLTISW3:
5437  for (unsigned i = 0; i != 16; ++i)
5438  ShufIdxs[i] = (i&3)+12;
5439  break;
5440  case OP_VSLDOI4:
5441  return BuildVSLDOI(OpLHS, OpRHS, 4, OpLHS.getValueType(), DAG, dl);
5442  case OP_VSLDOI8:
5443  return BuildVSLDOI(OpLHS, OpRHS, 8, OpLHS.getValueType(), DAG, dl);
5444  case OP_VSLDOI12:
5445  return BuildVSLDOI(OpLHS, OpRHS, 12, OpLHS.getValueType(), DAG, dl);
5446  }
5447  EVT VT = OpLHS.getValueType();
5448  OpLHS = DAG.getNode(ISD::BITCAST, dl, MVT::v16i8, OpLHS);
5449  OpRHS = DAG.getNode(ISD::BITCAST, dl, MVT::v16i8, OpRHS);
5450  SDValue T = DAG.getVectorShuffle(MVT::v16i8, dl, OpLHS, OpRHS, ShufIdxs);
5451  return DAG.getNode(ISD::BITCAST, dl, VT, T);
5452 }
5453 
5454 /// LowerVECTOR_SHUFFLE - Return the code we lower for VECTOR_SHUFFLE. If this
5455 /// is a shuffle we can handle in a single instruction, return it. Otherwise,
5456 /// return the code it can be lowered into. Worst case, it can always be
5457 /// lowered into a vperm.
5458 SDValue PPCTargetLowering::LowerVECTOR_SHUFFLE(SDValue Op,
5459  SelectionDAG &DAG) const {
5460  SDLoc dl(Op);
5461  SDValue V1 = Op.getOperand(0);
5462  SDValue V2 = Op.getOperand(1);
5463  ShuffleVectorSDNode *SVOp = cast<ShuffleVectorSDNode>(Op);
5464  EVT VT = Op.getValueType();
5465 
5466  // Cases that are handled by instructions that take permute immediates
5467  // (such as vsplt*) should be left as VECTOR_SHUFFLE nodes so they can be
5468  // selected by the instruction selector.
5469  if (V2.getOpcode() == ISD::UNDEF) {
5470  if (PPC::isSplatShuffleMask(SVOp, 1) ||
5471  PPC::isSplatShuffleMask(SVOp, 2) ||
5472  PPC::isSplatShuffleMask(SVOp, 4) ||
5473  PPC::isVPKUWUMShuffleMask(SVOp, true) ||
5474  PPC::isVPKUHUMShuffleMask(SVOp, true) ||
5475  PPC::isVSLDOIShuffleMask(SVOp, true) != -1 ||
5476  PPC::isVMRGLShuffleMask(SVOp, 1, true) ||
5477  PPC::isVMRGLShuffleMask(SVOp, 2, true) ||
5478  PPC::isVMRGLShuffleMask(SVOp, 4, true) ||
5479  PPC::isVMRGHShuffleMask(SVOp, 1, true) ||
5480  PPC::isVMRGHShuffleMask(SVOp, 2, true) ||
5481  PPC::isVMRGHShuffleMask(SVOp, 4, true)) {
5482  return Op;
5483  }
5484  }
5485 
5486  // Altivec has a variety of "shuffle immediates" that take two vector inputs
5487  // and produce a fixed permutation. If any of these match, do not lower to
5488  // VPERM.
5489  if (PPC::isVPKUWUMShuffleMask(SVOp, false) ||
5490  PPC::isVPKUHUMShuffleMask(SVOp, false) ||
5491  PPC::isVSLDOIShuffleMask(SVOp, false) != -1 ||
5492  PPC::isVMRGLShuffleMask(SVOp, 1, false) ||
5493  PPC::isVMRGLShuffleMask(SVOp, 2, false) ||
5494  PPC::isVMRGLShuffleMask(SVOp, 4, false) ||
5495  PPC::isVMRGHShuffleMask(SVOp, 1, false) ||
5496  PPC::isVMRGHShuffleMask(SVOp, 2, false) ||
5497  PPC::isVMRGHShuffleMask(SVOp, 4, false))
5498  return Op;
5499 
5500  // Check to see if this is a shuffle of 4-byte values. If so, we can use our
5501  // perfect shuffle table to emit an optimal matching sequence.
5502  ArrayRef<int> PermMask = SVOp->getMask();
5503 
5504  unsigned PFIndexes[4];
5505  bool isFourElementShuffle = true;
5506  for (unsigned i = 0; i != 4 && isFourElementShuffle; ++i) { // Element number
5507  unsigned EltNo = 8; // Start out undef.
5508  for (unsigned j = 0; j != 4; ++j) { // Intra-element byte.
5509  if (PermMask[i*4+j] < 0)
5510  continue; // Undef, ignore it.
5511 
5512  unsigned ByteSource = PermMask[i*4+j];
5513  if ((ByteSource & 3) != j) {
5514  isFourElementShuffle = false;
5515  break;
5516  }
5517 
5518  if (EltNo == 8) {
5519  EltNo = ByteSource/4;
5520  } else if (EltNo != ByteSource/4) {
5521  isFourElementShuffle = false;
5522  break;
5523  }
5524  }
5525  PFIndexes[i] = EltNo;
5526  }
5527 
5528  // If this shuffle can be expressed as a shuffle of 4-byte elements, use the
5529  // perfect shuffle vector to determine if it is cost effective to do this as
5530  // discrete instructions, or whether we should use a vperm.
5531  if (isFourElementShuffle) {
5532  // Compute the index in the perfect shuffle table.
5533  unsigned PFTableIndex =
5534  PFIndexes[0]*9*9*9+PFIndexes[1]*9*9+PFIndexes[2]*9+PFIndexes[3];
5535 
5536  unsigned PFEntry = PerfectShuffleTable[PFTableIndex];
5537  unsigned Cost = (PFEntry >> 30);
5538 
5539  // Determining when to avoid vperm is tricky. Many things affect the cost
5540  // of vperm, particularly how many times the perm mask needs to be computed.
5541  // For example, if the perm mask can be hoisted out of a loop or is already
5542  // used (perhaps because there are multiple permutes with the same shuffle
5543  // mask?) the vperm has a cost of 1. OTOH, hoisting the permute mask out of
5544  // the loop requires an extra register.
5545  //
5546  // As a compromise, we only emit discrete instructions if the shuffle can be
5547  // generated in 3 or fewer operations. When we have loop information
5548  // available, if this block is within a loop, we should avoid using vperm
5549  // for 3-operation perms and use a constant pool load instead.
5550  if (Cost < 3)
5551  return GeneratePerfectShuffle(PFEntry, V1, V2, DAG, dl);
5552  }
5553 
5554  // Lower this to a VPERM(V1, V2, V3) expression, where V3 is a constant
5555  // vector that will get spilled to the constant pool.
5556  if (V2.getOpcode() == ISD::UNDEF) V2 = V1;
5557 
5558  // The SHUFFLE_VECTOR mask is almost exactly what we want for vperm, except
5559  // that it is in input element units, not in bytes. Convert now.
5560  EVT EltVT = V1.getValueType().getVectorElementType();
5561  unsigned BytesPerElement = EltVT.getSizeInBits()/8;
5562 
5563  SmallVector<SDValue, 16> ResultMask;
5564  for (unsigned i = 0, e = VT.getVectorNumElements(); i != e; ++i) {
5565  unsigned SrcElt = PermMask[i] < 0 ? 0 : PermMask[i];
5566 
5567  for (unsigned j = 0; j != BytesPerElement; ++j)
5568  ResultMask.push_back(DAG.getConstant(SrcElt*BytesPerElement+j,
5569  MVT::i32));
5570  }
5571 
5572  SDValue VPermMask = DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v16i8,
5573  &ResultMask[0], ResultMask.size());
5574  return DAG.getNode(PPCISD::VPERM, dl, V1.getValueType(), V1, V2, VPermMask);
5575 }
5576 
5577 /// getAltivecCompareInfo - Given an intrinsic, return false if it is not an
5578 /// altivec comparison. If it is, return true and fill in Opc/isDot with
5579 /// information about the intrinsic.
5580 static bool getAltivecCompareInfo(SDValue Intrin, int &CompareOpc,
5581  bool &isDot) {
5582  unsigned IntrinsicID =
5583  cast<ConstantSDNode>(Intrin.getOperand(0))->getZExtValue();
5584  CompareOpc = -1;
5585  isDot = false;
5586  switch (IntrinsicID) {
5587  default: return false;
5588  // Comparison predicates.
5589  case Intrinsic::ppc_altivec_vcmpbfp_p: CompareOpc = 966; isDot = 1; break;
5590  case Intrinsic::ppc_altivec_vcmpeqfp_p: CompareOpc = 198; isDot = 1; break;
5591  case Intrinsic::ppc_altivec_vcmpequb_p: CompareOpc = 6; isDot = 1; break;
5592  case Intrinsic::ppc_altivec_vcmpequh_p: CompareOpc = 70; isDot = 1; break;
5593  case Intrinsic::ppc_altivec_vcmpequw_p: CompareOpc = 134; isDot = 1; break;
5594  case Intrinsic::ppc_altivec_vcmpgefp_p: CompareOpc = 454; isDot = 1; break;
5595  case Intrinsic::ppc_altivec_vcmpgtfp_p: CompareOpc = 710; isDot = 1; break;
5596  case Intrinsic::ppc_altivec_vcmpgtsb_p: CompareOpc = 774; isDot = 1; break;
5597  case Intrinsic::ppc_altivec_vcmpgtsh_p: CompareOpc = 838; isDot = 1; break;
5598  case Intrinsic::ppc_altivec_vcmpgtsw_p: CompareOpc = 902; isDot = 1; break;
5599  case Intrinsic::ppc_altivec_vcmpgtub_p: CompareOpc = 518; isDot = 1; break;
5600  case Intrinsic::ppc_altivec_vcmpgtuh_p: CompareOpc = 582; isDot = 1; break;
5601  case Intrinsic::ppc_altivec_vcmpgtuw_p: CompareOpc = 646; isDot = 1; break;
5602 
5603  // Normal Comparisons.
5604  case Intrinsic::ppc_altivec_vcmpbfp: CompareOpc = 966; isDot = 0; break;
5605  case Intrinsic::ppc_altivec_vcmpeqfp: CompareOpc = 198; isDot = 0; break;
5606  case Intrinsic::ppc_altivec_vcmpequb: CompareOpc = 6; isDot = 0; break;
5607  case Intrinsic::ppc_altivec_vcmpequh: CompareOpc = 70; isDot = 0; break;
5608  case Intrinsic::ppc_altivec_vcmpequw: CompareOpc = 134; isDot = 0; break;
5609  case Intrinsic::ppc_altivec_vcmpgefp: CompareOpc = 454; isDot = 0; break;
5610  case Intrinsic::ppc_altivec_vcmpgtfp: CompareOpc = 710; isDot = 0; break;
5611  case Intrinsic::ppc_altivec_vcmpgtsb: CompareOpc = 774; isDot = 0; break;
5612  case Intrinsic::ppc_altivec_vcmpgtsh: CompareOpc = 838; isDot = 0; break;
5613  case Intrinsic::ppc_altivec_vcmpgtsw: CompareOpc = 902; isDot = 0; break;
5614  case Intrinsic::ppc_altivec_vcmpgtub: CompareOpc = 518; isDot = 0; break;
5615  case Intrinsic::ppc_altivec_vcmpgtuh: CompareOpc = 582; isDot = 0; break;
5616  case Intrinsic::ppc_altivec_vcmpgtuw: CompareOpc = 646; isDot = 0; break;
5617  }
5618  return true;
5619 }
5620 
5621 /// LowerINTRINSIC_WO_CHAIN - If this is an intrinsic that we want to custom
5622 /// lower, do it, otherwise return null.
5623 SDValue PPCTargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op,
5624  SelectionDAG &DAG) const {
5625  // If this is a lowered altivec predicate compare, CompareOpc is set to the
5626  // opcode number of the comparison.
5627  SDLoc dl(Op);
5628  int CompareOpc;
5629  bool isDot;
5630  if (!getAltivecCompareInfo(Op, CompareOpc, isDot))
5631  return SDValue(); // Don't custom lower most intrinsics.
5632 
5633  // If this is a non-dot comparison, make the VCMP node and we are done.
5634  if (!isDot) {
5635  SDValue Tmp = DAG.getNode(PPCISD::VCMP, dl, Op.getOperand(2).getValueType(),
5636  Op.getOperand(1), Op.getOperand(2),
5637  DAG.getConstant(CompareOpc, MVT::i32));
5638  return DAG.getNode(ISD::BITCAST, dl, Op.getValueType(), Tmp);
5639  }
5640 
5641  // Create the PPCISD altivec 'dot' comparison node.
5642  SDValue Ops[] = {
5643  Op.getOperand(2), // LHS
5644  Op.getOperand(3), // RHS
5645  DAG.getConstant(CompareOpc, MVT::i32)
5646  };
5647  EVT VTs[] = { Op.getOperand(2).getValueType(), MVT::Glue };
5648  SDValue CompNode = DAG.getNode(PPCISD::VCMPo, dl, VTs, Ops, 3);
5649 
5650  // Now that we have the comparison, emit a copy from the CR to a GPR.
5651  // This is flagged to the above dot comparison.
5652  SDValue Flags = DAG.getNode(PPCISD::MFOCRF, dl, MVT::i32,
5653  DAG.getRegister(PPC::CR6, MVT::i32),
5654  CompNode.getValue(1));
5655 
5656  // Unpack the result based on how the target uses it.
5657  unsigned BitNo; // Bit # of CR6.
5658  bool InvertBit; // Invert result?
5659  switch (cast<ConstantSDNode>(Op.getOperand(1))->getZExtValue()) {
5660  default: // Can't happen, don't crash on invalid number though.
5661  case 0: // Return the value of the EQ bit of CR6.
5662  BitNo = 0; InvertBit = false;
5663  break;
5664  case 1: // Return the inverted value of the EQ bit of CR6.
5665  BitNo = 0; InvertBit = true;
5666  break;
5667  case 2: // Return the value of the LT bit of CR6.
5668  BitNo = 2; InvertBit = false;
5669  break;
5670  case 3: // Return the inverted value of the LT bit of CR6.
5671  BitNo = 2; InvertBit = true;
5672  break;
5673  }
5674 
5675  // Shift the bit into the low position.
5676  Flags = DAG.getNode(ISD::SRL, dl, MVT::i32, Flags,
5677  DAG.getConstant(8-(3-BitNo), MVT::i32));
5678  // Isolate the bit.
5679  Flags = DAG.getNode(ISD::AND, dl, MVT::i32, Flags,
5680  DAG.getConstant(1, MVT::i32));
5681 
5682  // If we are supposed to, toggle the bit.
5683  if (InvertBit)
5684  Flags = DAG.getNode(ISD::XOR, dl, MVT::i32, Flags,
5685  DAG.getConstant(1, MVT::i32));
5686  return Flags;
5687 }
5688 
5689 SDValue PPCTargetLowering::LowerSCALAR_TO_VECTOR(SDValue Op,
5690  SelectionDAG &DAG) const {
5691  SDLoc dl(Op);
5692  // Create a stack slot that is 16-byte aligned.
5693  MachineFrameInfo *FrameInfo = DAG.getMachineFunction().getFrameInfo();
5694  int FrameIdx = FrameInfo->CreateStackObject(16, 16, false);
5695  EVT PtrVT = getPointerTy();
5696  SDValue FIdx = DAG.getFrameIndex(FrameIdx, PtrVT);
5697 
5698  // Store the input value into Value#0 of the stack slot.
5699  SDValue Store = DAG.getStore(DAG.getEntryNode(), dl,
5700  Op.getOperand(0), FIdx, MachinePointerInfo(),
5701  false, false, 0);
5702  // Load it out.
5703  return DAG.getLoad(Op.getValueType(), dl, Store, FIdx, MachinePointerInfo(),
5704  false, false, false, 0);
5705 }
5706 
5707 SDValue PPCTargetLowering::LowerMUL(SDValue Op, SelectionDAG &DAG) const {
5708  SDLoc dl(Op);
5709  if (Op.getValueType() == MVT::v4i32) {
5710  SDValue LHS = Op.getOperand(0), RHS = Op.getOperand(1);
5711 
5712  SDValue Zero = BuildSplatI( 0, 1, MVT::v4i32, DAG, dl);
5713  SDValue Neg16 = BuildSplatI(-16, 4, MVT::v4i32, DAG, dl);//+16 as shift amt.
5714 
5715  SDValue RHSSwap = // = vrlw RHS, 16
5716  BuildIntrinsicOp(Intrinsic::ppc_altivec_vrlw, RHS, Neg16, DAG, dl);
5717 
5718  // Shrinkify inputs to v8i16.
5719  LHS = DAG.getNode(ISD::BITCAST, dl, MVT::v8i16, LHS);
5720  RHS = DAG.getNode(ISD::BITCAST, dl, MVT::v8i16, RHS);
5721  RHSSwap = DAG.getNode(ISD::BITCAST, dl, MVT::v8i16, RHSSwap);
5722 
5723  // Low parts multiplied together, generating 32-bit results (we ignore the
5724  // top parts).
5726  LHS, RHS, DAG, dl, MVT::v4i32);
5727 
5729  LHS, RHSSwap, Zero, DAG, dl, MVT::v4i32);
5730  // Shift the high parts up 16 bits.
5732  Neg16, DAG, dl);
5733  return DAG.getNode(ISD::ADD, dl, MVT::v4i32, LoProd, HiProd);
5734  } else if (Op.getValueType() == MVT::v8i16) {
5735  SDValue LHS = Op.getOperand(0), RHS = Op.getOperand(1);
5736 
5737  SDValue Zero = BuildSplatI(0, 1, MVT::v8i16, DAG, dl);
5738 
5740  LHS, RHS, Zero, DAG, dl);
5741  } else if (Op.getValueType() == MVT::v16i8) {
5742  SDValue LHS = Op.getOperand(0), RHS = Op.getOperand(1);
5743 
5744  // Multiply the even 8-bit parts, producing 16-bit sums.
5746  LHS, RHS, DAG, dl, MVT::v8i16);
5747  EvenParts = DAG.getNode(ISD::BITCAST, dl, MVT::v16i8, EvenParts);
5748 
5749  // Multiply the odd 8-bit parts, producing 16-bit sums.
5751  LHS, RHS, DAG, dl, MVT::v8i16);
5752  OddParts = DAG.getNode(ISD::BITCAST, dl, MVT::v16i8, OddParts);
5753 
5754  // Merge the results together.
5755  int Ops[16];
5756  for (unsigned i = 0; i != 8; ++i) {
5757  Ops[i*2 ] = 2*i+1;
5758  Ops[i*2+1] = 2*i+1+16;
5759  }
5760  return DAG.getVectorShuffle(MVT::v16i8, dl, EvenParts, OddParts, Ops);
5761  } else {
5762  llvm_unreachable("Unknown mul to lower!");
5763  }
5764 }
5765 
5766 /// LowerOperation - Provide custom lowering hooks for some operations.
5767 ///
5769  switch (Op.getOpcode()) {
5770  default: llvm_unreachable("Wasn't expecting to be able to lower this!");
5771  case ISD::ConstantPool: return LowerConstantPool(Op, DAG);
5772  case ISD::BlockAddress: return LowerBlockAddress(Op, DAG);
5773  case ISD::GlobalAddress: return LowerGlobalAddress(Op, DAG);
5774  case ISD::GlobalTLSAddress: return LowerGlobalTLSAddress(Op, DAG);
5775  case ISD::JumpTable: return LowerJumpTable(Op, DAG);
5776  case ISD::SETCC: return LowerSETCC(Op, DAG);
5777  case ISD::INIT_TRAMPOLINE: return LowerINIT_TRAMPOLINE(Op, DAG);
5778  case ISD::ADJUST_TRAMPOLINE: return LowerADJUST_TRAMPOLINE(Op, DAG);
5779  case ISD::VASTART:
5780  return LowerVASTART(Op, DAG, PPCSubTarget);
5781 
5782  case ISD::VAARG:
5783  return LowerVAARG(Op, DAG, PPCSubTarget);
5784 
5785  case ISD::VACOPY:
5786  return LowerVACOPY(Op, DAG, PPCSubTarget);
5787 
5788  case ISD::STACKRESTORE: return LowerSTACKRESTORE(Op, DAG, PPCSubTarget);
5790  return LowerDYNAMIC_STACKALLOC(Op, DAG, PPCSubTarget);
5791 
5792  case ISD::EH_SJLJ_SETJMP: return lowerEH_SJLJ_SETJMP(Op, DAG);
5793  case ISD::EH_SJLJ_LONGJMP: return lowerEH_SJLJ_LONGJMP(Op, DAG);
5794 
5795  case ISD::SELECT_CC: return LowerSELECT_CC(Op, DAG);
5796  case ISD::FP_TO_UINT:
5797  case ISD::FP_TO_SINT: return LowerFP_TO_INT(Op, DAG,
5798  SDLoc(Op));
5799  case ISD::UINT_TO_FP:
5800  case ISD::SINT_TO_FP: return LowerINT_TO_FP(Op, DAG);
5801  case ISD::FLT_ROUNDS_: return LowerFLT_ROUNDS_(Op, DAG);
5802 
5803  // Lower 64-bit shifts.
5804  case ISD::SHL_PARTS: return LowerSHL_PARTS(Op, DAG);
5805  case ISD::SRL_PARTS: return LowerSRL_PARTS(Op, DAG);
5806  case ISD::SRA_PARTS: return LowerSRA_PARTS(Op, DAG);
5807 
5808  // Vector-related lowering.
5809  case ISD::BUILD_VECTOR: return LowerBUILD_VECTOR(Op, DAG);
5810  case ISD::VECTOR_SHUFFLE: return LowerVECTOR_SHUFFLE(Op, DAG);
5811  case ISD::INTRINSIC_WO_CHAIN: return LowerINTRINSIC_WO_CHAIN(Op, DAG);
5812  case ISD::SCALAR_TO_VECTOR: return LowerSCALAR_TO_VECTOR(Op, DAG);
5813  case ISD::MUL: return LowerMUL(Op, DAG);
5814 
5815  // For counter-based loop handling.
5816  case ISD::INTRINSIC_W_CHAIN: return SDValue();
5817 
5818  // Frame & Return address.
5819  case ISD::RETURNADDR: return LowerRETURNADDR(Op, DAG);
5820  case ISD::FRAMEADDR: return LowerFRAMEADDR(Op, DAG);
5821  }
5822 }
5823 
5825  SmallVectorImpl<SDValue>&Results,
5826  SelectionDAG &DAG) const {
5827  const TargetMachine &TM = getTargetMachine();
5828  SDLoc dl(N);
5829  switch (N->getOpcode()) {
5830  default:
5831  llvm_unreachable("Do not know how to custom type legalize this operation!");
5832  case ISD::INTRINSIC_W_CHAIN: {
5833  if (cast<ConstantSDNode>(N->getOperand(1))->getZExtValue() !=
5835  break;
5836 
5837  assert(N->getValueType(0) == MVT::i1 &&
5838  "Unexpected result type for CTR decrement intrinsic");
5839  EVT SVT = getSetCCResultType(*DAG.getContext(), N->getValueType(0));
5840  SDVTList VTs = DAG.getVTList(SVT, MVT::Other);
5841  SDValue NewInt = DAG.getNode(N->getOpcode(), dl, VTs, N->getOperand(0),
5842  N->getOperand(1));
5843 
5844  Results.push_back(NewInt);
5845  Results.push_back(NewInt.getValue(1));
5846  break;
5847  }
5848  case ISD::VAARG: {
5849  if (!TM.getSubtarget<PPCSubtarget>().isSVR4ABI()
5850  || TM.getSubtarget<PPCSubtarget>().isPPC64())
5851  return;
5852 
5853  EVT VT = N->getValueType(0);
5854 
5855  if (VT == MVT::i64) {
5856  SDValue NewNode = LowerVAARG(SDValue(N, 1), DAG, PPCSubTarget);
5857 
5858  Results.push_back(NewNode);
5859  Results.push_back(NewNode.getValue(1));
5860  }
5861  return;
5862  }
5863  case ISD::FP_ROUND_INREG: {
5864  assert(N->getValueType(0) == MVT::ppcf128);
5865  assert(N->getOperand(0).getValueType() == MVT::ppcf128);
5866  SDValue Lo = DAG.getNode(ISD::EXTRACT_ELEMENT, dl,
5867  MVT::f64, N->getOperand(0),
5868  DAG.getIntPtrConstant(0));
5869  SDValue Hi = DAG.getNode(ISD::EXTRACT_ELEMENT, dl,
5870  MVT::f64, N->getOperand(0),
5871  DAG.getIntPtrConstant(1));
5872 
5873  // Add the two halves of the long double in round-to-zero mode.
5874  SDValue FPreg = DAG.getNode(PPCISD::FADDRTZ, dl, MVT::f64, Lo, Hi);
5875 
5876  // We know the low half is about to be thrown away, so just use something
5877  // convenient.
5878  Results.push_back(DAG.getNode(ISD::BUILD_PAIR, dl, MVT::ppcf128,
5879  FPreg, FPreg));
5880  return;
5881  }
5882  case ISD::FP_TO_SINT:
5883  // LowerFP_TO_INT() can only handle f32 and f64.
5884  if (N->getOperand(0).getValueType() == MVT::ppcf128)
5885  return;
5886  Results.push_back(LowerFP_TO_INT(SDValue(N, 0), DAG, dl));
5887  return;
5888  }
5889 }
5890 
5891 
5892 //===----------------------------------------------------------------------===//
5893 // Other Lowering Code
5894 //===----------------------------------------------------------------------===//
5895 
5898  bool is64bit, unsigned BinOpcode) const {
5899  // This also handles ATOMIC_SWAP, indicated by BinOpcode==0.
5901 
5902  const BasicBlock *LLVM_BB = BB->getBasicBlock();
5903  MachineFunction *F = BB->getParent();
5904  MachineFunction::iterator It = BB;
5905  ++It;
5906 
5907  unsigned dest = MI->getOperand(0).getReg();
5908  unsigned ptrA = MI->getOperand(1).getReg();
5909  unsigned ptrB = MI->getOperand(2).getReg();
5910  unsigned incr = MI->getOperand(3).getReg();
5911  DebugLoc dl = MI->getDebugLoc();
5912 
5913  MachineBasicBlock *loopMBB = F->CreateMachineBasicBlock(LLVM_BB);
5914  MachineBasicBlock *exitMBB = F->CreateMachineBasicBlock(LLVM_BB);
5915  F->insert(It, loopMBB);
5916  F->insert(It, exitMBB);
5917  exitMBB->splice(exitMBB->begin(), BB,
5919  BB->end());
5920  exitMBB->transferSuccessorsAndUpdatePHIs(BB);
5921 
5922  MachineRegisterInfo &RegInfo = F->getRegInfo();
5923  unsigned TmpReg = (!BinOpcode) ? incr :
5924  RegInfo.createVirtualRegister(
5925  is64bit ? (const TargetRegisterClass *) &PPC::G8RCRegClass :
5926  (const TargetRegisterClass *) &PPC::GPRCRegClass);
5927 
5928  // thisMBB:
5929  // ...
5930  // fallthrough --> loopMBB
5931  BB->addSuccessor(loopMBB);
5932 
5933  // loopMBB:
5934  // l[wd]arx dest, ptr
5935  // add r0, dest, incr
5936  // st[wd]cx. r0, ptr
5937  // bne- loopMBB
5938  // fallthrough --> exitMBB
5939  BB = loopMBB;
5940  BuildMI(BB, dl, TII->get(is64bit ? PPC::LDARX : PPC::LWARX), dest)
5941  .addReg(ptrA).addReg(ptrB);
5942  if (BinOpcode)
5943  BuildMI(BB, dl, TII->get(BinOpcode), TmpReg).addReg(incr).addReg(dest);
5944  BuildMI(BB, dl, TII->get(is64bit ? PPC::STDCX : PPC::STWCX))
5945  .addReg(TmpReg).addReg(ptrA).addReg(ptrB);
5946  BuildMI(BB, dl, TII->get(PPC::BCC))
5947  .addImm(PPC::PRED_NE).addReg(PPC::CR0).addMBB(loopMBB);
5948  BB->addSuccessor(loopMBB);
5949  BB->addSuccessor(exitMBB);
5950 
5951  // exitMBB:
5952  // ...
5953  BB = exitMBB;
5954  return BB;
5955 }
5956 
5959  MachineBasicBlock *BB,
5960  bool is8bit, // operation
5961  unsigned BinOpcode) const {
5962  // This also handles ATOMIC_SWAP, indicated by BinOpcode==0.
5964  // In 64 bit mode we have to use 64 bits for addresses, even though the
5965  // lwarx/stwcx are 32 bits. With the 32-bit atomics we can use address
5966  // registers without caring whether they're 32 or 64, but here we're
5967  // doing actual arithmetic on the addresses.
5968  bool is64bit = PPCSubTarget.isPPC64();
5969  unsigned ZeroReg = is64bit ? PPC::ZERO8 : PPC::ZERO;
5970 
5971  const BasicBlock *LLVM_BB = BB->getBasicBlock();
5972  MachineFunction *F = BB->getParent();
5973  MachineFunction::iterator It = BB;
5974  ++It;
5975 
5976  unsigned dest = MI->getOperand(0).getReg();
5977  unsigned ptrA = MI->getOperand(1).getReg();
5978  unsigned ptrB = MI->getOperand(2).getReg();
5979  unsigned incr = MI->getOperand(3).getReg();
5980  DebugLoc dl = MI->getDebugLoc();
5981 
5982  MachineBasicBlock *loopMBB = F->CreateMachineBasicBlock(LLVM_BB);
5983  MachineBasicBlock *exitMBB = F->CreateMachineBasicBlock(LLVM_BB);
5984  F->insert(It, loopMBB);
5985  F->insert(It, exitMBB);
5986  exitMBB->splice(exitMBB->begin(), BB,
5988  BB->end());
5989  exitMBB->transferSuccessorsAndUpdatePHIs(BB);
5990 
5991  MachineRegisterInfo &RegInfo = F->getRegInfo();
5992  const TargetRegisterClass *RC =
5993  is64bit ? (const TargetRegisterClass *) &PPC::G8RCRegClass :
5994  (const TargetRegisterClass *) &PPC::GPRCRegClass;
5995  unsigned PtrReg = RegInfo.createVirtualRegister(RC);
5996  unsigned Shift1Reg = RegInfo.createVirtualRegister(RC);
5997  unsigned ShiftReg = RegInfo.createVirtualRegister(RC);
5998  unsigned Incr2Reg = RegInfo.createVirtualRegister(RC);
5999  unsigned MaskReg = RegInfo.createVirtualRegister(RC);
6000  unsigned Mask2Reg = RegInfo.createVirtualRegister(RC);
6001  unsigned Mask3Reg = RegInfo.createVirtualRegister(RC);
6002  unsigned Tmp2Reg = RegInfo.createVirtualRegister(RC);
6003  unsigned Tmp3Reg = RegInfo.createVirtualRegister(RC);
6004  unsigned Tmp4Reg = RegInfo.createVirtualRegister(RC);
6005  unsigned TmpDestReg = RegInfo.createVirtualRegister(RC);
6006  unsigned Ptr1Reg;
6007  unsigned TmpReg = (!BinOpcode) ? Incr2Reg : RegInfo.createVirtualRegister(RC);
6008 
6009  // thisMBB:
6010  // ...
6011  // fallthrough --> loopMBB
6012  BB->addSuccessor(loopMBB);
6013 
6014  // The 4-byte load must be aligned, while a char or short may be
6015  // anywhere in the word. Hence all this nasty bookkeeping code.
6016  // add ptr1, ptrA, ptrB [copy if ptrA==0]
6017  // rlwinm shift1, ptr1, 3, 27, 28 [3, 27, 27]
6018  // xori shift, shift1, 24 [16]
6019  // rlwinm ptr, ptr1, 0, 0, 29
6020  // slw incr2, incr, shift
6021  // li mask2, 255 [li mask3, 0; ori mask2, mask3, 65535]
6022  // slw mask, mask2, shift
6023  // loopMBB:
6024  // lwarx tmpDest, ptr
6025  // add tmp, tmpDest, incr2
6026  // andc tmp2, tmpDest, mask
6027  // and tmp3, tmp, mask
6028  // or tmp4, tmp3, tmp2
6029  // stwcx. tmp4, ptr
6030  // bne- loopMBB
6031  // fallthrough --> exitMBB
6032  // srw dest, tmpDest, shift
6033  if (ptrA != ZeroReg) {
6034  Ptr1Reg = RegInfo.createVirtualRegister(RC);
6035  BuildMI(BB, dl, TII->get(is64bit ? PPC::ADD8 : PPC::ADD4), Ptr1Reg)
6036  .addReg(ptrA).addReg(ptrB);
6037  } else {
6038  Ptr1Reg = ptrB;
6039  }
6040  BuildMI(BB, dl, TII->get(PPC::RLWINM), Shift1Reg).addReg(Ptr1Reg)
6041  .addImm(3).addImm(27).addImm(is8bit ? 28 : 27);
6042  BuildMI(BB, dl, TII->get(is64bit ? PPC::XORI8 : PPC::XORI), ShiftReg)
6043  .addReg(Shift1Reg).addImm(is8bit ? 24 : 16);
6044  if (is64bit)
6045  BuildMI(BB, dl, TII->get(PPC::RLDICR), PtrReg)
6046  .addReg(Ptr1Reg).addImm(0).addImm(61);
6047  else
6048  BuildMI(BB, dl, TII->get(PPC::RLWINM), PtrReg)
6049  .addReg(Ptr1Reg).addImm(0).addImm(0).addImm(29);
6050  BuildMI(BB, dl, TII->get(PPC::SLW), Incr2Reg)
6051  .addReg(incr).addReg(ShiftReg);
6052  if (is8bit)
6053  BuildMI(BB, dl, TII->get(PPC::LI), Mask2Reg).addImm(255);
6054  else {
6055  BuildMI(BB, dl, TII->get(PPC::LI), Mask3Reg).addImm(0);
6056  BuildMI(BB, dl, TII->get(PPC::ORI),Mask2Reg).addReg(Mask3Reg).addImm(65535);
6057  }
6058  BuildMI(BB, dl, TII->get(PPC::SLW), MaskReg)
6059  .addReg(Mask2Reg).addReg(ShiftReg);
6060 
6061  BB = loopMBB;
6062  BuildMI(BB, dl, TII->get(PPC::LWARX), TmpDestReg)
6063  .addReg(ZeroReg).addReg(PtrReg);
6064  if (BinOpcode)
6065  BuildMI(BB, dl, TII->get(BinOpcode), TmpReg)
6066  .addReg(Incr2Reg).addReg(TmpDestReg);
6067  BuildMI(BB, dl, TII->get(is64bit ? PPC::ANDC8 : PPC::ANDC), Tmp2Reg)
6068  .addReg(TmpDestReg).addReg(MaskReg);
6069  BuildMI(BB, dl, TII->get(is64bit ? PPC::AND8 : PPC::AND), Tmp3Reg)
6070  .addReg(TmpReg).addReg(MaskReg);
6071  BuildMI(BB, dl, TII->get(is64bit ? PPC::OR8 : PPC::OR), Tmp4Reg)
6072  .addReg(Tmp3Reg).addReg(Tmp2Reg);
6073  BuildMI(BB, dl, TII->get(PPC::STWCX))
6074  .addReg(Tmp4Reg).addReg(ZeroReg).addReg(PtrReg);
6075  BuildMI(BB, dl, TII->get(PPC::BCC))
6076  .addImm(PPC::PRED_NE).addReg(PPC::CR0).addMBB(loopMBB);
6077  BB->addSuccessor(loopMBB);
6078  BB->addSuccessor(exitMBB);
6079 
6080  // exitMBB:
6081  // ...
6082  BB = exitMBB;
6083  BuildMI(*BB, BB->begin(), dl, TII->get(PPC::SRW), dest).addReg(TmpDestReg)
6084  .addReg(ShiftReg);
6085  return BB;
6086 }
6087 
6090  MachineBasicBlock *MBB) const {
6091  DebugLoc DL = MI->getDebugLoc();
6093 
6094  MachineFunction *MF = MBB->getParent();
6096 
6097  const BasicBlock *BB = MBB->getBasicBlock();
6099  ++I;
6100 
6101  // Memory Reference
6104 
6105  unsigned DstReg = MI->getOperand(0).getReg();
6106  const TargetRegisterClass *RC = MRI.getRegClass(DstReg);
6107  assert(RC->hasType(MVT::i32) && "Invalid destination!");
6108  unsigned mainDstReg = MRI.createVirtualRegister(RC);
6109  unsigned restoreDstReg = MRI.createVirtualRegister(RC);
6110 
6111  MVT PVT = getPointerTy();
6112  assert((PVT == MVT::i64 || PVT == MVT::i32) &&
6113  "Invalid Pointer Size!");
6114  // For v = setjmp(buf), we generate
6115  //
6116  // thisMBB:
6117  // SjLjSetup mainMBB
6118  // bl mainMBB
6119  // v_restore = 1
6120  // b sinkMBB
6121  //
6122  // mainMBB:
6123  // buf[LabelOffset] = LR
6124  // v_main = 0
6125  //
6126  // sinkMBB:
6127  // v = phi(main, restore)
6128  //
6129 
6130  MachineBasicBlock *thisMBB = MBB;
6131  MachineBasicBlock *mainMBB = MF->CreateMachineBasicBlock(BB);
6132  MachineBasicBlock *sinkMBB = MF->CreateMachineBasicBlock(BB);
6133  MF->insert(I, mainMBB);
6134  MF->insert(I, sinkMBB);
6135 
6136  MachineInstrBuilder MIB;
6137 
6138  // Transfer the remainder of BB and its successor edges to sinkMBB.
6139  sinkMBB->splice(sinkMBB->begin(), MBB,
6141  sinkMBB->transferSuccessorsAndUpdatePHIs(MBB);
6142 
6143  // Note that the structure of the jmp_buf used here is not compatible
6144  // with that used by libc, and is not designed to be. Specifically, it
6145  // stores only those 'reserved' registers that LLVM does not otherwise
6146  // understand how to spill. Also, by convention, by the time this
6147  // intrinsic is called, Clang has already stored the frame address in the
6148  // first slot of the buffer and stack address in the third. Following the
6149  // X86 target code, we'll store the jump address in the second slot. We also
6150  // need to save the TOC pointer (R2) to handle jumps between shared
6151  // libraries, and that will be stored in the fourth slot. The thread
6152  // identifier (R13) is not affected.
6153 
6154  // thisMBB:
6155  const int64_t LabelOffset = 1 * PVT.getStoreSize();
6156  const int64_t TOCOffset = 3 * PVT.getStoreSize();
6157  const int64_t BPOffset = 4 * PVT.getStoreSize();
6158 
6159  // Prepare IP either in reg.
6160  const TargetRegisterClass *PtrRC = getRegClassFor(PVT);
6161  unsigned LabelReg = MRI.createVirtualRegister(PtrRC);
6162  unsigned BufReg = MI->getOperand(1).getReg();
6163 
6164  if (PPCSubTarget.isPPC64() && PPCSubTarget.isSVR4ABI()) {
6165  MIB = BuildMI(*thisMBB, MI, DL, TII->get(PPC::STD))
6166  .addReg(PPC::X2)
6167  .addImm(TOCOffset)
6168  .addReg(BufReg);
6169  MIB.setMemRefs(MMOBegin, MMOEnd);
6170  }
6171 
6172  // Naked functions never have a base pointer, and so we use r1. For all
6173  // other functions, this decision must be delayed until during PEI.
6174  unsigned BaseReg;
6175  if (MF->getFunction()->getAttributes().hasAttribute(
6176  AttributeSet::FunctionIndex, Attribute::Naked))
6177  BaseReg = PPCSubTarget.isPPC64() ? PPC::X1 : PPC::R1;
6178  else
6179  BaseReg = PPCSubTarget.isPPC64() ? PPC::BP8 : PPC::BP;
6180 
6181  MIB = BuildMI(*thisMBB, MI, DL,
6182  TII->get(PPCSubTarget.isPPC64() ? PPC::STD : PPC::STW))
6183  .addReg(BaseReg)
6184  .addImm(BPOffset)
6185  .addReg(BufReg);
6186  MIB.setMemRefs(MMOBegin, MMOEnd);
6187 
6188  // Setup
6189  MIB = BuildMI(*thisMBB, MI, DL, TII->get(PPC::BCLalways)).addMBB(mainMBB);
6190  const PPCRegisterInfo *TRI =
6191  static_cast<const PPCRegisterInfo*>(getTargetMachine().getRegisterInfo());
6192  MIB.addRegMask(TRI->getNoPreservedMask());
6193 
6194  BuildMI(*thisMBB, MI, DL, TII->get(PPC::LI), restoreDstReg).addImm(1);
6195 
6196  MIB = BuildMI(*thisMBB, MI, DL, TII->get(PPC::EH_SjLj_Setup))
6197  .addMBB(mainMBB);
6198  MIB = BuildMI(*thisMBB, MI, DL, TII->get(PPC::B)).addMBB(sinkMBB);
6199 
6200  thisMBB->addSuccessor(mainMBB, /* weight */ 0);
6201  thisMBB->addSuccessor(sinkMBB, /* weight */ 1);
6202 
6203  // mainMBB:
6204  // mainDstReg = 0
6205  MIB = BuildMI(mainMBB, DL,
6206  TII->get(PPCSubTarget.isPPC64() ? PPC::MFLR8 : PPC::MFLR), LabelReg);
6207 
6208  // Store IP
6209  if (PPCSubTarget.isPPC64()) {
6210  MIB = BuildMI(mainMBB, DL, TII->get(PPC::STD))
6211  .addReg(LabelReg)
6212  .addImm(LabelOffset)
6213  .addReg(BufReg);
6214  } else {
6215  MIB = BuildMI(mainMBB, DL, TII->get(PPC::STW))
6216  .addReg(LabelReg)
6217  .addImm(LabelOffset)
6218  .addReg(BufReg);
6219  }
6220 
6221  MIB.setMemRefs(MMOBegin, MMOEnd);
6222 
6223  BuildMI(mainMBB, DL, TII->get(PPC::LI), mainDstReg).addImm(0);
6224  mainMBB->addSuccessor(sinkMBB);
6225 
6226  // sinkMBB:
6227  BuildMI(*sinkMBB, sinkMBB->begin(), DL,
6228  TII->get(PPC::PHI), DstReg)
6229  .addReg(mainDstReg).addMBB(mainMBB)
6230  .addReg(restoreDstReg).addMBB(thisMBB);
6231 
6232  MI->eraseFromParent();
6233  return sinkMBB;
6234 }
6235 
6238  MachineBasicBlock *MBB) const {
6239  DebugLoc DL = MI->getDebugLoc();
6241 
6242  MachineFunction *MF = MBB->getParent();
6244 
6245  // Memory Reference
6248 
6249  MVT PVT = getPointerTy();
6250  assert((PVT == MVT::i64 || PVT == MVT::i32) &&
6251  "Invalid Pointer Size!");
6252 
6253  const TargetRegisterClass *RC =
6254  (PVT == MVT::i64) ? &PPC::G8RCRegClass : &PPC::GPRCRegClass;
6255  unsigned Tmp = MRI.createVirtualRegister(RC);
6256  // Since FP is only updated here but NOT referenced, it's treated as GPR.
6257  unsigned FP = (PVT == MVT::i64) ? PPC::X31 : PPC::R31;
6258  unsigned SP = (PVT == MVT::i64) ? PPC::X1 : PPC::R1;
6259  unsigned BP = (PVT == MVT::i64) ? PPC::X30 : PPC::R30;
6260 
6261  MachineInstrBuilder MIB;
6262 
6263  const int64_t LabelOffset = 1 * PVT.getStoreSize();
6264  const int64_t SPOffset = 2 * PVT.getStoreSize();
6265  const int64_t TOCOffset = 3 * PVT.getStoreSize();
6266  const int64_t BPOffset = 4 * PVT.getStoreSize();
6267 
6268  unsigned BufReg = MI->getOperand(0).getReg();
6269 
6270  // Reload FP (the jumped-to function may not have had a
6271  // frame pointer, and if so, then its r31 will be restored
6272  // as necessary).
6273  if (PVT == MVT::i64) {
6274  MIB = BuildMI(*MBB, MI, DL, TII->get(PPC::LD), FP)
6275  .addImm(0)
6276  .addReg(BufReg);
6277  } else {
6278  MIB = BuildMI(*MBB, MI, DL, TII->get(PPC::LWZ), FP)
6279  .addImm(0)
6280  .addReg(BufReg);
6281  }
6282  MIB.setMemRefs(MMOBegin, MMOEnd);
6283 
6284  // Reload IP
6285  if (PVT == MVT::i64) {
6286  MIB = BuildMI(*MBB, MI, DL, TII->get(PPC::LD), Tmp)
6287  .addImm(LabelOffset)
6288  .addReg(BufReg);
6289  } else {
6290  MIB = BuildMI(*MBB, MI, DL, TII->get(PPC::LWZ), Tmp)
6291  .addImm(LabelOffset)
6292  .addReg(BufReg);
6293  }
6294  MIB.setMemRefs(MMOBegin, MMOEnd);
6295 
6296  // Reload SP
6297  if (PVT == MVT::i64) {
6298  MIB = BuildMI(*MBB, MI, DL, TII->get(PPC::LD), SP)
6299  .addImm(SPOffset)
6300  .addReg(BufReg);
6301  } else {
6302  MIB = BuildMI(*MBB, MI, DL, TII->get(PPC::LWZ), SP)
6303  .addImm(SPOffset)
6304  .addReg(BufReg);
6305  }
6306  MIB.setMemRefs(MMOBegin, MMOEnd);
6307 
6308  // Reload BP
6309  if (PVT == MVT::i64) {
6310  MIB = BuildMI(*MBB, MI, DL, TII->get(PPC::LD), BP)
6311  .addImm(BPOffset)
6312  .addReg(BufReg);
6313  } else {
6314  MIB = BuildMI(*MBB, MI, DL, TII->get(PPC::LWZ), BP)
6315  .addImm(BPOffset)
6316  .addReg(BufReg);
6317  }
6318  MIB.setMemRefs(MMOBegin, MMOEnd);
6319 
6320  // Reload TOC
6321  if (PVT == MVT::i64 && PPCSubTarget.isSVR4ABI()) {
6322  MIB = BuildMI(*MBB, MI, DL, TII->get(PPC::LD), PPC::X2)
6323  .addImm(TOCOffset)
6324  .addReg(BufReg);
6325 
6326  MIB.setMemRefs(MMOBegin, MMOEnd);
6327  }
6328 
6329  // Jump
6330  BuildMI(*MBB, MI, DL,
6331  TII->get(PVT == MVT::i64 ? PPC::MTCTR8 : PPC::MTCTR)).addReg(Tmp);
6332  BuildMI(*MBB, MI, DL, TII->get(PVT == MVT::i64 ? PPC::BCTR8 : PPC::BCTR));
6333 
6334  MI->eraseFromParent();
6335  return MBB;
6336 }
6337 
6340  MachineBasicBlock *BB) const {
6341  if (MI->getOpcode() == PPC::EH_SjLj_SetJmp32 ||
6342  MI->getOpcode() == PPC::EH_SjLj_SetJmp64) {
6343  return emitEHSjLjSetJmp(MI, BB);
6344  } else if (MI->getOpcode() == PPC::EH_SjLj_LongJmp32 ||
6345  MI->getOpcode() == PPC::EH_SjLj_LongJmp64) {
6346  return emitEHSjLjLongJmp(MI, BB);
6347  }
6348 
6350 
6351  // To "insert" these instructions we actually have to insert their
6352  // control-flow patterns.
6353  const BasicBlock *LLVM_BB = BB->getBasicBlock();
6354  MachineFunction::iterator It = BB;
6355  ++It;
6356 
6357  MachineFunction *F = BB->getParent();
6358 
6359  if (PPCSubTarget.hasISEL() && (MI->getOpcode() == PPC::SELECT_CC_I4 ||
6360  MI->getOpcode() == PPC::SELECT_CC_I8)) {
6362  Cond.push_back(MI->getOperand(4));
6363  Cond.push_back(MI->getOperand(1));
6364 
6365  DebugLoc dl = MI->getDebugLoc();
6367  TII->insertSelect(*BB, MI, dl, MI->getOperand(0).getReg(),
6368  Cond, MI->getOperand(2).getReg(),
6369  MI->getOperand(3).getReg());
6370  } else if (MI->getOpcode() == PPC::SELECT_CC_I4 ||
6371  MI->getOpcode() == PPC::SELECT_CC_I8 ||
6372  MI->getOpcode() == PPC::SELECT_CC_F4 ||
6373  MI->getOpcode() == PPC::SELECT_CC_F8 ||
6374  MI->getOpcode() == PPC::SELECT_CC_VRRC) {
6375 
6376 
6377  // The incoming instruction knows the destination vreg to set, the
6378  // condition code register to branch on, the true/false values to
6379  // select between, and a branch opcode to use.
6380 
6381  // thisMBB:
6382  // ...
6383  // TrueVal = ...
6384  // cmpTY ccX, r1, r2
6385  // bCC copy1MBB
6386  // fallthrough --> copy0MBB
6387  MachineBasicBlock *thisMBB = BB;
6388  MachineBasicBlock *copy0MBB = F->CreateMachineBasicBlock(LLVM_BB);
6389  MachineBasicBlock *sinkMBB = F->CreateMachineBasicBlock(LLVM_BB);
6390  unsigned SelectPred = MI->getOperand(4).getImm();
6391  DebugLoc dl = MI->getDebugLoc();
6392  F->insert(It, copy0MBB);
6393  F->insert(It, sinkMBB);
6394 
6395  // Transfer the remainder of BB and its successor edges to sinkMBB.
6396  sinkMBB->splice(sinkMBB->begin(), BB,
6398  BB->end());
6399  sinkMBB->transferSuccessorsAndUpdatePHIs(BB);
6400 
6401  // Next, add the true and fallthrough blocks as its successors.
6402  BB->addSuccessor(copy0MBB);
6403  BB->addSuccessor(sinkMBB);
6404 
6405  BuildMI(BB, dl, TII->get(PPC::BCC))
6406  .addImm(SelectPred).addReg(MI->getOperand(1).getReg()).addMBB(sinkMBB);
6407 
6408  // copy0MBB:
6409  // %FalseValue = ...
6410  // # fallthrough to sinkMBB
6411  BB = copy0MBB;
6412 
6413  // Update machine-CFG edges
6414  BB->addSuccessor(sinkMBB);
6415 
6416  // sinkMBB:
6417  // %Result = phi [ %FalseValue, copy0MBB ], [ %TrueValue, thisMBB ]
6418  // ...
6419  BB = sinkMBB;
6420  BuildMI(*BB, BB->begin(), dl,
6421  TII->get(PPC::PHI), MI->getOperand(0).getReg())
6422  .addReg(MI->getOperand(3).getReg()).addMBB(copy0MBB)
6423  .addReg(MI->getOperand(2).getReg()).addMBB(thisMBB);
6424  }
6425  else if (MI->getOpcode() == PPC::ATOMIC_LOAD_ADD_I8)
6426  BB = EmitPartwordAtomicBinary(MI, BB, true, PPC::ADD4);
6427  else if (MI->getOpcode() == PPC::ATOMIC_LOAD_ADD_I16)
6428  BB = EmitPartwordAtomicBinary(MI, BB, false, PPC::ADD4);
6429  else if (MI->getOpcode() == PPC::ATOMIC_LOAD_ADD_I32)
6430  BB = EmitAtomicBinary(MI, BB, false, PPC::ADD4);
6431  else if (MI->getOpcode() == PPC::ATOMIC_LOAD_ADD_I64)
6432  BB = EmitAtomicBinary(MI, BB, true, PPC::ADD8);
6433 
6434  else if (MI->getOpcode() == PPC::ATOMIC_LOAD_AND_I8)
6435  BB = EmitPartwordAtomicBinary(MI, BB, true, PPC::AND);
6436  else if (MI->getOpcode() == PPC::ATOMIC_LOAD_AND_I16)
6437  BB = EmitPartwordAtomicBinary(MI, BB, false, PPC::AND);
6438  else if (MI->getOpcode() == PPC::ATOMIC_LOAD_AND_I32)
6439  BB = EmitAtomicBinary(MI, BB, false, PPC::AND);
6440  else if (MI->getOpcode() == PPC::ATOMIC_LOAD_AND_I64)
6441  BB = EmitAtomicBinary(MI, BB, true, PPC::AND8);
6442 
6443  else if (MI->getOpcode() == PPC::ATOMIC_LOAD_OR_I8)
6444  BB = EmitPartwordAtomicBinary(MI, BB, true, PPC::OR);
6445  else if (MI->getOpcode() == PPC::ATOMIC_LOAD_OR_I16)
6446  BB = EmitPartwordAtomicBinary(MI, BB, false, PPC::OR);
6447  else if (MI->getOpcode() == PPC::ATOMIC_LOAD_OR_I32)
6448  BB = EmitAtomicBinary(MI, BB, false, PPC::OR);
6449  else if (MI->getOpcode() == PPC::ATOMIC_LOAD_OR_I64)
6450  BB = EmitAtomicBinary(MI, BB, true, PPC::OR8);
6451 
6452  else if (MI->getOpcode() == PPC::ATOMIC_LOAD_XOR_I8)
6453  BB = EmitPartwordAtomicBinary(MI, BB, true, PPC::XOR);
6454  else if (MI->getOpcode() == PPC::ATOMIC_LOAD_XOR_I16)
6455  BB = EmitPartwordAtomicBinary(MI, BB, false, PPC::XOR);
6456  else if (MI->getOpcode() == PPC::ATOMIC_LOAD_XOR_I32)
6457  BB = EmitAtomicBinary(MI, BB, false, PPC::XOR);
6458  else if (MI->getOpcode() == PPC::ATOMIC_LOAD_XOR_I64)
6459  BB = EmitAtomicBinary(MI, BB, true, PPC::XOR8);
6460 
6461  else if (MI->getOpcode() == PPC::ATOMIC_LOAD_NAND_I8)
6462  BB = EmitPartwordAtomicBinary(MI, BB, true, PPC::ANDC);
6463  else if (MI->getOpcode() == PPC::ATOMIC_LOAD_NAND_I16)
6464  BB = EmitPartwordAtomicBinary(MI, BB, false, PPC::ANDC);
6465  else if (MI->getOpcode() == PPC::ATOMIC_LOAD_NAND_I32)
6466  BB = EmitAtomicBinary(MI, BB, false, PPC::ANDC);
6467  else if (MI->getOpcode() == PPC::ATOMIC_LOAD_NAND_I64)
6468  BB = EmitAtomicBinary(MI, BB, true, PPC::ANDC8);
6469 
6470  else if (MI->getOpcode() == PPC::ATOMIC_LOAD_SUB_I8)
6471  BB = EmitPartwordAtomicBinary(MI, BB, true, PPC::SUBF);
6472  else if (MI->getOpcode() == PPC::ATOMIC_LOAD_SUB_I16)
6473  BB = EmitPartwordAtomicBinary(MI, BB, false, PPC::SUBF);
6474  else if (MI->getOpcode() == PPC::ATOMIC_LOAD_SUB_I32)
6475  BB = EmitAtomicBinary(MI, BB, false, PPC::SUBF);
6476  else if (MI->getOpcode() == PPC::ATOMIC_LOAD_SUB_I64)
6477  BB = EmitAtomicBinary(MI, BB, true, PPC::SUBF8);
6478 
6479  else if (MI->getOpcode() == PPC::ATOMIC_SWAP_I8)
6480  BB = EmitPartwordAtomicBinary(MI, BB, true, 0);
6481  else if (MI->getOpcode() == PPC::ATOMIC_SWAP_I16)
6482  BB = EmitPartwordAtomicBinary(MI, BB, false, 0);
6483  else if (MI->getOpcode() == PPC::ATOMIC_SWAP_I32)
6484  BB = EmitAtomicBinary(MI, BB, false, 0);
6485  else if (MI->getOpcode() == PPC::ATOMIC_SWAP_I64)
6486  BB = EmitAtomicBinary(MI, BB, true, 0);
6487 
6488  else if (MI->getOpcode() == PPC::ATOMIC_CMP_SWAP_I32 ||
6489  MI->getOpcode() == PPC::ATOMIC_CMP_SWAP_I64) {
6490  bool is64bit = MI->getOpcode() == PPC::ATOMIC_CMP_SWAP_I64;
6491 
6492  unsigned dest = MI->getOperand(0).getReg();
6493  unsigned ptrA = MI->getOperand(1).getReg();
6494  unsigned ptrB = MI->getOperand(2).getReg();
6495  unsigned oldval = MI->getOperand(3).getReg();
6496  unsigned newval = MI->getOperand(4).getReg();
6497  DebugLoc dl = MI->getDebugLoc();
6498 
6499  MachineBasicBlock *loop1MBB = F->CreateMachineBasicBlock(LLVM_BB);
6500  MachineBasicBlock *loop2MBB = F->CreateMachineBasicBlock(LLVM_BB);
6501  MachineBasicBlock *midMBB = F->CreateMachineBasicBlock(LLVM_BB);
6502  MachineBasicBlock *exitMBB = F->CreateMachineBasicBlock(LLVM_BB);
6503  F->insert(It, loop1MBB);
6504  F->insert(It, loop2MBB);
6505  F->insert(It, midMBB);
6506  F->insert(It, exitMBB);
6507  exitMBB->splice(exitMBB->begin(), BB,
6509  BB->end());
6510  exitMBB->transferSuccessorsAndUpdatePHIs(BB);
6511 
6512  // thisMBB:
6513  // ...
6514  // fallthrough --> loopMBB
6515  BB->addSuccessor(loop1MBB);
6516 
6517  // loop1MBB:
6518  // l[wd]arx dest, ptr
6519  // cmp[wd] dest, oldval
6520  // bne- midMBB
6521  // loop2MBB:
6522  // st[wd]cx. newval, ptr
6523  // bne- loopMBB
6524  // b exitBB
6525  // midMBB:
6526  // st[wd]cx. dest, ptr
6527  // exitBB:
6528  BB = loop1MBB;
6529  BuildMI(BB, dl, TII->get(is64bit ? PPC::LDARX : PPC::LWARX), dest)
6530  .addReg(ptrA).addReg(ptrB);
6531  BuildMI(BB, dl, TII->get(is64bit ? PPC::CMPD : PPC::CMPW), PPC::CR0)
6532  .addReg(oldval).addReg(dest);
6533  BuildMI(BB, dl, TII->get(PPC::BCC))
6534  .addImm(PPC::PRED_NE).addReg(PPC::CR0).addMBB(midMBB);
6535  BB->addSuccessor(loop2MBB);
6536  BB->addSuccessor(midMBB);
6537 
6538  BB = loop2MBB;
6539  BuildMI(BB, dl, TII->get(is64bit ? PPC::STDCX : PPC::STWCX))
6540  .addReg(newval).addReg(ptrA).addReg(ptrB);
6541  BuildMI(BB, dl, TII->get(PPC::BCC))
6542  .addImm(PPC::PRED_NE).addReg(PPC::CR0).addMBB(loop1MBB);
6543  BuildMI(BB, dl, TII->get(PPC::B)).addMBB(exitMBB);
6544  BB->addSuccessor(loop1MBB);
6545  BB->addSuccessor(exitMBB);
6546 
6547  BB = midMBB;
6548  BuildMI(BB, dl, TII->get(is64bit ? PPC::STDCX : PPC::STWCX))
6549  .addReg(dest).addReg(ptrA).addReg(ptrB);
6550  BB->addSuccessor(exitMBB);
6551 
6552  // exitMBB:
6553  // ...
6554  BB = exitMBB;
6555  } else if (MI->getOpcode() == PPC::ATOMIC_CMP_SWAP_I8 ||
6556  MI->getOpcode() == PPC::ATOMIC_CMP_SWAP_I16) {
6557  // We must use 64-bit registers for addresses when targeting 64-bit,
6558  // since we're actually doing arithmetic on them. Other registers
6559  // can be 32-bit.
6560  bool is64bit = PPCSubTarget.isPPC64();
6561  bool is8bit = MI->getOpcode() == PPC::ATOMIC_CMP_SWAP_I8;
6562 
6563  unsigned dest = MI->getOperand(0).getReg();
6564  unsigned ptrA = MI->getOperand(1).getReg();
6565  unsigned ptrB = MI->getOperand(2).getReg();
6566  unsigned oldval = MI->getOperand(3).getReg();
6567  unsigned newval = MI->getOperand(4).getReg();
6568  DebugLoc dl = MI->getDebugLoc();
6569 
6570  MachineBasicBlock *loop1MBB = F->CreateMachineBasicBlock(LLVM_BB);
6571  MachineBasicBlock *loop2MBB = F->CreateMachineBasicBlock(LLVM_BB);
6572  MachineBasicBlock *midMBB = F->CreateMachineBasicBlock(LLVM_BB);
6573  MachineBasicBlock *exitMBB = F->CreateMachineBasicBlock(LLVM_BB);
6574  F->insert(It, loop1MBB);
6575  F->insert(It, loop2MBB);
6576  F->insert(It, midMBB);
6577  F->insert(It, exitMBB);
6578  exitMBB->splice(exitMBB->begin(), BB,
6580  BB->end());
6581  exitMBB->transferSuccessorsAndUpdatePHIs(BB);
6582 
6583  MachineRegisterInfo &RegInfo = F->getRegInfo();
6584  const TargetRegisterClass *RC =
6585  is64bit ? (const TargetRegisterClass *) &PPC::G8RCRegClass :
6586  (const TargetRegisterClass *) &PPC::GPRCRegClass;
6587  unsigned PtrReg = RegInfo.createVirtualRegister(RC);
6588  unsigned Shift1Reg = RegInfo.createVirtualRegister(RC);
6589  unsigned ShiftReg = RegInfo.createVirtualRegister(RC);
6590  unsigned NewVal2Reg = RegInfo.createVirtualRegister(RC);
6591  unsigned NewVal3Reg = RegInfo.createVirtualRegister(RC);
6592  unsigned OldVal2Reg = RegInfo.createVirtualRegister(RC);
6593  unsigned OldVal3Reg = RegInfo.createVirtualRegister(RC);
6594  unsigned MaskReg = RegInfo.createVirtualRegister(RC);
6595  unsigned Mask2Reg = RegInfo.createVirtualRegister(RC);
6596  unsigned Mask3Reg = RegInfo.createVirtualRegister(RC);
6597  unsigned Tmp2Reg = RegInfo.createVirtualRegister(RC);
6598  unsigned Tmp4Reg = RegInfo.createVirtualRegister(RC);
6599  unsigned TmpDestReg = RegInfo.createVirtualRegister(RC);
6600  unsigned Ptr1Reg;
6601  unsigned TmpReg = RegInfo.createVirtualRegister(RC);
6602  unsigned ZeroReg = is64bit ? PPC::ZERO8 : PPC::ZERO;
6603  // thisMBB:
6604  // ...
6605  // fallthrough --> loopMBB
6606  BB->addSuccessor(loop1MBB);
6607 
6608  // The 4-byte load must be aligned, while a char or short may be
6609  // anywhere in the word. Hence all this nasty bookkeeping code.
6610  // add ptr1, ptrA, ptrB [copy if ptrA==0]
6611  // rlwinm shift1, ptr1, 3, 27, 28 [3, 27, 27]
6612  // xori shift, shift1, 24 [16]
6613  // rlwinm ptr, ptr1, 0, 0, 29
6614  // slw newval2, newval, shift
6615  // slw oldval2, oldval,shift
6616  // li mask2, 255 [li mask3, 0; ori mask2, mask3, 65535]
6617  // slw mask, mask2, shift
6618  // and newval3, newval2, mask
6619  // and oldval3, oldval2, mask
6620  // loop1MBB:
6621  // lwarx tmpDest, ptr
6622  // and tmp, tmpDest, mask
6623  // cmpw tmp, oldval3
6624  // bne- midMBB
6625  // loop2MBB:
6626  // andc tmp2, tmpDest, mask
6627  // or tmp4, tmp2, newval3
6628  // stwcx. tmp4, ptr
6629  // bne- loop1MBB
6630  // b exitBB
6631  // midMBB:
6632  // stwcx. tmpDest, ptr
6633  // exitBB:
6634  // srw dest, tmpDest, shift
6635  if (ptrA != ZeroReg) {
6636  Ptr1Reg = RegInfo.createVirtualRegister(RC);
6637  BuildMI(BB, dl, TII->get(is64bit ? PPC::ADD8 : PPC::ADD4), Ptr1Reg)
6638  .addReg(ptrA).addReg(ptrB);
6639  } else {
6640  Ptr1Reg = ptrB;
6641  }
6642  BuildMI(BB, dl, TII->get(PPC::RLWINM), Shift1Reg).addReg(Ptr1Reg)
6643  .addImm(3).addImm(27).addImm(is8bit ? 28 : 27);
6644  BuildMI(BB, dl, TII->get(is64bit ? PPC::XORI8 : PPC::XORI), ShiftReg)
6645  .addReg(Shift1Reg).addImm(is8bit ? 24 : 16);
6646  if (is64bit)
6647  BuildMI(BB, dl, TII->get(PPC::RLDICR), PtrReg)
6648  .addReg(Ptr1Reg).addImm(0).addImm(61);
6649  else
6650  BuildMI(BB, dl, TII->get(PPC::RLWINM), PtrReg)
6651  .addReg(Ptr1Reg).addImm(0).addImm(0).addImm(29);
6652  BuildMI(BB, dl, TII->get(PPC::SLW), NewVal2Reg)
6653  .addReg(newval).addReg(ShiftReg);
6654  BuildMI(BB, dl, TII->get(PPC::SLW), OldVal2Reg)
6655  .addReg(oldval).addReg(ShiftReg);
6656  if (is8bit)
6657  BuildMI(BB, dl, TII->get(PPC::LI), Mask2Reg).addImm(255);
6658  else {
6659  BuildMI(BB, dl, TII->get(PPC::LI), Mask3Reg).addImm(0);
6660  BuildMI(BB, dl, TII->get(PPC::ORI), Mask2Reg)
6661  .addReg(Mask3Reg).addImm(65535);
6662  }
6663  BuildMI(BB, dl, TII->get(PPC::SLW), MaskReg)
6664  .addReg(Mask2Reg).addReg(ShiftReg);
6665  BuildMI(BB, dl, TII->get(PPC::AND), NewVal3Reg)
6666  .addReg(NewVal2Reg).addReg(MaskReg);
6667  BuildMI(BB, dl, TII->get(PPC::AND), OldVal3Reg)
6668  .addReg(OldVal2Reg).addReg(MaskReg);
6669 
6670  BB = loop1MBB;
6671  BuildMI(BB, dl, TII->get(PPC::LWARX), TmpDestReg)
6672  .addReg(ZeroReg).addReg(PtrReg);
6673  BuildMI(BB, dl, TII->get(PPC::AND),TmpReg)
6674  .addReg(TmpDestReg).addReg(MaskReg);
6675  BuildMI(BB, dl, TII->get(PPC::CMPW), PPC::CR0)
6676  .addReg(TmpReg).addReg(OldVal3Reg);
6677  BuildMI(BB, dl, TII->get(PPC::BCC))
6678  .addImm(PPC::PRED_NE).addReg(PPC::CR0).addMBB(midMBB);
6679  BB->addSuccessor(loop2MBB);
6680  BB->addSuccessor(midMBB);
6681 
6682  BB = loop2MBB;
6683  BuildMI(BB, dl, TII->get(PPC::ANDC),Tmp2Reg)
6684  .addReg(TmpDestReg).addReg(MaskReg);
6685  BuildMI(BB, dl, TII->get(PPC::OR),Tmp4Reg)
6686  .addReg(Tmp2Reg).addReg(NewVal3Reg);
6687  BuildMI(BB, dl, TII->get(PPC::STWCX)).addReg(Tmp4Reg)
6688  .addReg(ZeroReg).addReg(PtrReg);
6689  BuildMI(BB, dl, TII->get(PPC::BCC))
6690  .addImm(PPC::PRED_NE).addReg(PPC::CR0).addMBB(loop1MBB);
6691  BuildMI(BB, dl, TII->get(PPC::B)).addMBB(exitMBB);
6692  BB->addSuccessor(loop1MBB);
6693  BB->addSuccessor(exitMBB);
6694 
6695  BB = midMBB;
6696  BuildMI(BB, dl, TII->get(PPC::STWCX)).addReg(TmpDestReg)
6697  .addReg(ZeroReg).addReg(PtrReg);
6698  BB->addSuccessor(exitMBB);
6699 
6700  // exitMBB:
6701  // ...
6702  BB = exitMBB;
6703  BuildMI(*BB, BB->begin(), dl, TII->get(PPC::SRW),dest).addReg(TmpReg)
6704  .addReg(ShiftReg);
6705  } else if (MI->getOpcode() == PPC::FADDrtz) {
6706  // This pseudo performs an FADD with rounding mode temporarily forced
6707  // to round-to-zero. We emit this via custom inserter since the FPSCR
6708  // is not modeled at the SelectionDAG level.
6709  unsigned Dest = MI->getOperand(0).getReg();
6710  unsigned Src1 = MI->getOperand(1).getReg();
6711  unsigned Src2 = MI->getOperand(2).getReg();
6712  DebugLoc dl = MI->getDebugLoc();
6713 
6714  MachineRegisterInfo &RegInfo = F->getRegInfo();
6715  unsigned MFFSReg = RegInfo.createVirtualRegister(&PPC::F8RCRegClass);
6716 
6717  // Save FPSCR value.
6718  BuildMI(*BB, MI, dl, TII->get(PPC::MFFS), MFFSReg);
6719 
6720  // Set rounding mode to round-to-zero.
6721  BuildMI(*BB, MI, dl, TII->get(PPC::MTFSB1)).addImm(31);
6722  BuildMI(*BB, MI, dl, TII->get(PPC::MTFSB0)).addImm(30);
6723 
6724  // Perform addition.
6725  BuildMI(*BB, MI, dl, TII->get(PPC::FADD), Dest).addReg(Src1).addReg(Src2);
6726 
6727  // Restore FPSCR value.
6728  BuildMI(*BB, MI, dl, TII->get(PPC::MTFSF)).addImm(1).addReg(MFFSReg);
6729  } else {
6730  llvm_unreachable("Unexpected instr type to insert");
6731  }
6732 
6733  MI->eraseFromParent(); // The pseudo instruction is gone now.
6734  return BB;
6735 }
6736 
6737 //===----------------------------------------------------------------------===//
6738 // Target Optimization Hooks
6739 //===----------------------------------------------------------------------===//
6740 
6741 SDValue PPCTargetLowering::DAGCombineFastRecip(SDValue Op,
6742  DAGCombinerInfo &DCI) const {
6743  if (DCI.isAfterLegalizeVectorOps())
6744  return SDValue();
6745 
6746  EVT VT = Op.getValueType();
6747 
6748  if ((VT == MVT::f32 && PPCSubTarget.hasFRES()) ||
6749  (VT == MVT::f64 && PPCSubTarget.hasFRE()) ||
6750  (VT == MVT::v4f32 && PPCSubTarget.hasAltivec())) {
6751 
6752  // Newton iteration for a function: F(X) is X_{i+1} = X_i - F(X_i)/F'(X_i)
6753  // For the reciprocal, we need to find the zero of the function:
6754  // F(X) = A X - 1 [which has a zero at X = 1/A]
6755  // =>
6756  // X_{i+1} = X_i (2 - A X_i) = X_i + X_i (1 - A X_i) [this second form
6757  // does not require additional intermediate precision]
6758 
6759  // Convergence is quadratic, so we essentially double the number of digits
6760  // correct after every iteration. The minimum architected relative
6761  // accuracy is 2^-5. When hasRecipPrec(), this is 2^-14. IEEE float has
6762  // 23 digits and double has 52 digits.
6763  int Iterations = PPCSubTarget.hasRecipPrec() ? 1 : 3;
6764  if (VT.getScalarType() == MVT::f64)
6765  ++Iterations;
6766 
6767  SelectionDAG &DAG = DCI.DAG;
6768  SDLoc dl(Op);
6769 
6770  SDValue FPOne =
6771  DAG.getConstantFP(1.0, VT.getScalarType());
6772  if (VT.isVector()) {
6773  assert(VT.getVectorNumElements() == 4 &&
6774  "Unknown vector type");
6775  FPOne = DAG.getNode(ISD::BUILD_VECTOR, dl, VT,
6776  FPOne, FPOne, FPOne, FPOne);
6777  }
6778 
6779  SDValue Est = DAG.getNode(PPCISD::FRE, dl, VT, Op);
6780  DCI.AddToWorklist(Est.getNode());
6781 
6782  // Newton iterations: Est = Est + Est (1 - Arg * Est)
6783  for (int i = 0; i < Iterations; ++i) {
6784  SDValue NewEst = DAG.getNode(ISD::FMUL, dl, VT, Op, Est);
6785  DCI.AddToWorklist(NewEst.getNode());
6786 
6787  NewEst = DAG.getNode(ISD::FSUB, dl, VT, FPOne, NewEst);
6788  DCI.AddToWorklist(NewEst.getNode());
6789 
6790  NewEst = DAG.getNode(ISD::FMUL, dl, VT, Est, NewEst);
6791  DCI.AddToWorklist(NewEst.getNode());
6792 
6793  Est = DAG.getNode(ISD::FADD, dl, VT, Est, NewEst);
6794  DCI.AddToWorklist(Est.getNode());
6795  }
6796 
6797  return Est;
6798  }
6799 
6800  return SDValue();
6801 }
6802 
6803 SDValue PPCTargetLowering::DAGCombineFastRecipFSQRT(SDValue Op,
6804  DAGCombinerInfo &DCI) const {
6805  if (DCI.isAfterLegalizeVectorOps())
6806  return SDValue();
6807 
6808  EVT VT = Op.getValueType();
6809 
6810  if ((VT == MVT::f32 && PPCSubTarget.hasFRSQRTES()) ||
6811  (VT == MVT::f64 && PPCSubTarget.hasFRSQRTE()) ||
6812  (VT == MVT::v4f32 && PPCSubTarget.hasAltivec())) {
6813 
6814  // Newton iteration for a function: F(X) is X_{i+1} = X_i - F(X_i)/F'(X_i)
6815  // For the reciprocal sqrt, we need to find the zero of the function:
6816  // F(X) = 1/X^2 - A [which has a zero at X = 1/sqrt(A)]
6817  // =>
6818  // X_{i+1} = X_i (1.5 - A X_i^2 / 2)
6819  // As a result, we precompute A/2 prior to the iteration loop.
6820 
6821  // Convergence is quadratic, so we essentially double the number of digits
6822  // correct after every iteration. The minimum architected relative
6823  // accuracy is 2^-5. When hasRecipPrec(), this is 2^-14. IEEE float has
6824  // 23 digits and double has 52 digits.
6825  int Iterations = PPCSubTarget.hasRecipPrec() ? 1 : 3;
6826  if (VT.getScalarType() == MVT::f64)
6827  ++Iterations;
6828 
6829  SelectionDAG &DAG = DCI.DAG;
6830  SDLoc dl(Op);
6831 
6832  SDValue FPThreeHalves =
6833  DAG.getConstantFP(1.5, VT.getScalarType());
6834  if (VT.isVector()) {
6835  assert(VT.getVectorNumElements() == 4 &&
6836  "Unknown vector type");
6837  FPThreeHalves = DAG.getNode(ISD::BUILD_VECTOR, dl, VT,
6838  FPThreeHalves, FPThreeHalves,
6839  FPThreeHalves, FPThreeHalves);
6840  }
6841 
6842  SDValue Est = DAG.getNode(PPCISD::FRSQRTE, dl, VT, Op);
6843  DCI.AddToWorklist(Est.getNode());
6844 
6845  // We now need 0.5*Arg which we can write as (1.5*Arg - Arg) so that
6846  // this entire sequence requires only one FP constant.
6847  SDValue HalfArg = DAG.getNode(ISD::FMUL, dl, VT, FPThreeHalves, Op);
6848  DCI.AddToWorklist(HalfArg.getNode());
6849 
6850  HalfArg = DAG.getNode(ISD::FSUB, dl, VT, HalfArg, Op);
6851  DCI.AddToWorklist(HalfArg.getNode());
6852 
6853  // Newton iterations: Est = Est * (1.5 - HalfArg * Est * Est)
6854  for (int i = 0; i < Iterations; ++i) {
6855  SDValue NewEst = DAG.getNode(ISD::FMUL, dl, VT, Est, Est);
6856  DCI.AddToWorklist(NewEst.getNode());
6857 
6858  NewEst = DAG.getNode(ISD::FMUL, dl, VT, HalfArg, NewEst);
6859  DCI.AddToWorklist(NewEst.getNode());
6860 
6861  NewEst = DAG.getNode(ISD::FSUB, dl, VT, FPThreeHalves, NewEst);
6862  DCI.AddToWorklist(NewEst.getNode());
6863 
6864  Est = DAG.getNode(ISD::FMUL, dl, VT, Est, NewEst);
6865  DCI.AddToWorklist(Est.getNode());
6866  }
6867 
6868  return Est;
6869  }
6870 
6871  return SDValue();
6872 }
6873 
6874 // Like SelectionDAG::isConsecutiveLoad, but also works for stores, and does
6875 // not enforce equality of the chain operands.
6877  unsigned Bytes, int Dist,
6878  SelectionDAG &DAG) {
6879  EVT VT = LS->getMemoryVT();
6880  if (VT.getSizeInBits() / 8 != Bytes)
6881  return false;
6882 
6883  SDValue Loc = LS->getBasePtr();
6884  SDValue BaseLoc = Base->getBasePtr();
6885  if (Loc.getOpcode() == ISD::FrameIndex) {
6886  if (BaseLoc.getOpcode() != ISD::FrameIndex)
6887  return false;
6888  const MachineFrameInfo *MFI = DAG.getMachineFunction().getFrameInfo();
6889  int FI = cast<FrameIndexSDNode>(Loc)->getIndex();
6890  int BFI = cast<FrameIndexSDNode>(BaseLoc)->getIndex();
6891  int FS = MFI->getObjectSize(FI);
6892  int BFS = MFI->getObjectSize(BFI);
6893  if (FS != BFS || FS != (int)Bytes) return false;
6894  return MFI->getObjectOffset(FI) == (MFI->getObjectOffset(BFI) + Dist*Bytes);
6895  }
6896 
6897  // Handle X+C
6898  if (DAG.isBaseWithConstantOffset(Loc) && Loc.getOperand(0) == BaseLoc &&
6899  cast<ConstantSDNode>(Loc.getOperand(1))->getSExtValue() == Dist*Bytes)
6900  return true;
6901 
6902  const TargetLowering &TLI = DAG.getTargetLoweringInfo();
6903  const GlobalValue *GV1 = NULL;
6904  const GlobalValue *GV2 = NULL;
6905  int64_t Offset1 = 0;
6906  int64_t Offset2 = 0;
6907  bool isGA1 = TLI.isGAPlusOffset(Loc.getNode(), GV1, Offset1);
6908  bool isGA2 = TLI.isGAPlusOffset(BaseLoc.getNode(), GV2, Offset2);
6909  if (isGA1 && isGA2 && GV1 == GV2)
6910  return Offset1 == (Offset2 + Dist*Bytes);
6911  return false;
6912 }
6913 
6914 // Return true is there is a nearyby consecutive load to the one provided
6915 // (regardless of alignment). We search up and down the chain, looking though
6916 // token factors and other loads (but nothing else). As a result, a true
6917 // results indicates that it is safe to create a new consecutive load adjacent
6918 // to the load provided.
6920  SDValue Chain = LD->getChain();
6921  EVT VT = LD->getMemoryVT();
6922 
6923  SmallSet<SDNode *, 16> LoadRoots;
6924  SmallVector<SDNode *, 8> Queue(1, Chain.getNode());
6925  SmallSet<SDNode *, 16> Visited;
6926 
6927  // First, search up the chain, branching to follow all token-factor operands.
6928  // If we find a consecutive load, then we're done, otherwise, record all
6929  // nodes just above the top-level loads and token factors.
6930  while (!Queue.empty()) {
6931  SDNode *ChainNext = Queue.pop_back_val();
6932  if (!Visited.insert(ChainNext))
6933  continue;
6934 
6935  if (LoadSDNode *ChainLD = dyn_cast<LoadSDNode>(ChainNext)) {
6936  if (isConsecutiveLS(ChainLD, LD, VT.getStoreSize(), 1, DAG))
6937  return true;
6938 
6939  if (!Visited.count(ChainLD->getChain().getNode()))
6940  Queue.push_back(ChainLD->getChain().getNode());
6941  } else if (ChainNext->getOpcode() == ISD::TokenFactor) {
6942  for (SDNode::op_iterator O = ChainNext->op_begin(),
6943  OE = ChainNext->op_end(); O != OE; ++O)
6944  if (!Visited.count(O->getNode()))
6945  Queue.push_back(O->getNode());
6946  } else
6947  LoadRoots.insert(ChainNext);
6948  }
6949 
6950  // Second, search down the chain, starting from the top-level nodes recorded
6951  // in the first phase. These top-level nodes are the nodes just above all
6952  // loads and token factors. Starting with their uses, recursively look though
6953  // all loads (just the chain uses) and token factors to find a consecutive
6954  // load.
6955  Visited.clear();
6956  Queue.clear();
6957 
6958  for (SmallSet<SDNode *, 16>::iterator I = LoadRoots.begin(),
6959  IE = LoadRoots.end(); I != IE; ++I) {
6960  Queue.push_back(*I);
6961 
6962  while (!Queue.empty()) {
6963  SDNode *LoadRoot = Queue.pop_back_val();
6964  if (!Visited.insert(LoadRoot))
6965  continue;
6966 
6967  if (LoadSDNode *ChainLD = dyn_cast<LoadSDNode>(LoadRoot))
6968  if (isConsecutiveLS(ChainLD, LD, VT.getStoreSize(), 1, DAG))
6969  return true;
6970 
6971  for (SDNode::use_iterator UI = LoadRoot->use_begin(),
6972  UE = LoadRoot->use_end(); UI != UE; ++UI)
6973  if (((isa<LoadSDNode>(*UI) &&
6974  cast<LoadSDNode>(*UI)->getChain().getNode() == LoadRoot) ||
6975  UI->getOpcode() == ISD::TokenFactor) && !Visited.count(*UI))
6976  Queue.push_back(*UI);
6977  }
6978  }
6979 
6980  return false;
6981 }
6982 
6984  DAGCombinerInfo &DCI) const {
6985  const TargetMachine &TM = getTargetMachine();
6986  SelectionDAG &DAG = DCI.DAG;
6987  SDLoc dl(N);
6988  switch (N->getOpcode()) {
6989  default: break;
6990  case PPCISD::SHL:
6991  if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(N->getOperand(0))) {
6992  if (C->isNullValue()) // 0 << V -> 0.
6993  return N->getOperand(0);
6994  }
6995  break;
6996  case PPCISD::SRL:
6997  if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(N->getOperand(0))) {
6998  if (C->isNullValue()) // 0 >>u V -> 0.
6999  return N->getOperand(0);
7000  }
7001  break;
7002  case PPCISD::SRA:
7003  if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(N->getOperand(0))) {
7004  if (C->isNullValue() || // 0 >>s V -> 0.
7005  C->isAllOnesValue()) // -1 >>s V -> -1.
7006  return N->getOperand(0);
7007  }
7008  break;
7009  case ISD::FDIV: {
7010  assert(TM.Options.UnsafeFPMath &&
7011  "Reciprocal estimates require UnsafeFPMath");
7012 
7013  if (N->getOperand(1).getOpcode() == ISD::FSQRT) {
7014  SDValue RV =
7015  DAGCombineFastRecipFSQRT(N->getOperand(1).getOperand(0), DCI);
7016  if (RV.getNode() != 0) {
7017  DCI.AddToWorklist(RV.getNode());
7018  return DAG.getNode(ISD::FMUL, dl, N->getValueType(0),
7019  N->getOperand(0), RV);
7020  }
7021  } else if (N->getOperand(1).getOpcode() == ISD::FP_EXTEND &&
7022  N->getOperand(1).getOperand(0).getOpcode() == ISD::FSQRT) {
7023  SDValue RV =
7024  DAGCombineFastRecipFSQRT(N->getOperand(1).getOperand(0).getOperand(0),
7025  DCI);
7026  if (RV.getNode() != 0) {
7027  DCI.AddToWorklist(RV.getNode());
7028  RV = DAG.getNode(ISD::FP_EXTEND, SDLoc(N->getOperand(1)),
7029  N->getValueType(0), RV);
7030  DCI.AddToWorklist(RV.getNode());
7031  return DAG.getNode(ISD::FMUL, dl, N->getValueType(0),
7032  N->getOperand(0), RV);
7033  }
7034  } else if (N->getOperand(1).getOpcode() == ISD::FP_ROUND &&
7035  N->getOperand(1).getOperand(0).getOpcode() == ISD::FSQRT) {
7036  SDValue RV =
7037  DAGCombineFastRecipFSQRT(N->getOperand(1).getOperand(0).getOperand(0),
7038  DCI);
7039  if (RV.getNode() != 0) {
7040  DCI.AddToWorklist(RV.getNode());
7041  RV = DAG.getNode(ISD::FP_ROUND, SDLoc(N->getOperand(1)),
7042  N->getValueType(0), RV,
7043  N->getOperand(1).getOperand(1));
7044  DCI.AddToWorklist(RV.getNode());
7045  return DAG.getNode(ISD::FMUL, dl, N->getValueType(0),
7046  N->getOperand(0), RV);
7047  }
7048  }
7049 
7050  SDValue RV = DAGCombineFastRecip(N->getOperand(1), DCI);
7051  if (RV.getNode() != 0) {
7052  DCI.AddToWorklist(RV.getNode());
7053  return DAG.getNode(ISD::FMUL, dl, N->getValueType(0),
7054  N->getOperand(0), RV);
7055  }
7056 
7057  }
7058  break;
7059  case ISD::FSQRT: {
7060  assert(TM.Options.UnsafeFPMath &&
7061  "Reciprocal estimates require UnsafeFPMath");
7062 
7063  // Compute this as 1/(1/sqrt(X)), which is the reciprocal of the
7064  // reciprocal sqrt.
7065  SDValue RV = DAGCombineFastRecipFSQRT(N->getOperand(0), DCI);
7066  if (RV.getNode() != 0) {
7067  DCI.AddToWorklist(RV.getNode());
7068  RV = DAGCombineFastRecip(RV, DCI);
7069  if (RV.getNode() != 0) {
7070  // Unfortunately, RV is now NaN if the input was exactly 0. Select out
7071  // this case and force the answer to 0.
7072 
7073  EVT VT = RV.getValueType();
7074 
7075  SDValue Zero = DAG.getConstantFP(0.0, VT.getScalarType());
7076  if (VT.isVector()) {
7077  assert(VT.getVectorNumElements() == 4 && "Unknown vector type");
7078  Zero = DAG.getNode(ISD::BUILD_VECTOR, dl, VT, Zero, Zero, Zero, Zero);
7079  }
7080 
7081  SDValue ZeroCmp =
7082  DAG.getSetCC(dl, getSetCCResultType(*DAG.getContext(), VT),
7083  N->getOperand(0), Zero, ISD::SETEQ);
7084  DCI.AddToWorklist(ZeroCmp.getNode());
7085  DCI.AddToWorklist(RV.getNode());
7086 
7087  RV = DAG.getNode(VT.isVector() ? ISD::VSELECT : ISD::SELECT, dl, VT,
7088  ZeroCmp, Zero, RV);
7089  return RV;
7090  }
7091  }
7092 
7093  }
7094  break;
7095  case ISD::SINT_TO_FP:
7097  if (N->getOperand(0).getOpcode() == ISD::FP_TO_SINT) {
7098  // Turn (sint_to_fp (fp_to_sint X)) -> fctidz/fcfid without load/stores.
7099  // We allow the src/dst to be either f32/f64, but the intermediate
7100  // type must be i64.
7101  if (N->getOperand(0).getValueType() == MVT::i64 &&
7103  SDValue Val = N->getOperand(0).getOperand(0);
7104  if (Val.getValueType() == MVT::f32) {
7105  Val = DAG.getNode(ISD::FP_EXTEND, dl, MVT::f64, Val);
7106  DCI.AddToWorklist(Val.getNode());
7107  }
7108 
7109  Val = DAG.getNode(PPCISD::FCTIDZ, dl, MVT::f64, Val);
7110  DCI.AddToWorklist(Val.getNode());
7111  Val = DAG.getNode(PPCISD::FCFID, dl, MVT::f64, Val);
7112  DCI.AddToWorklist(Val.getNode());
7113  if (N->getValueType(0) == MVT::f32) {
7114  Val = DAG.getNode(ISD::FP_ROUND, dl, MVT::f32, Val,
7115  DAG.getIntPtrConstant(0));
7116  DCI.AddToWorklist(Val.getNode());
7117  }
7118  return Val;
7119  } else if (N->getOperand(0).getValueType() == MVT::i32) {
7120  // If the intermediate type is i32, we can avoid the load/store here
7121  // too.
7122  }
7123  }
7124  }
7125  break;
7126  case ISD::STORE:
7127  // Turn STORE (FP_TO_SINT F) -> STFIWX(FCTIWZ(F)).
7128  if (TM.getSubtarget<PPCSubtarget>().hasSTFIWX() &&
7129  !cast<StoreSDNode>(N)->isTruncatingStore() &&
7130  N->getOperand(1).getOpcode() == ISD::FP_TO_SINT &&
7131  N->getOperand(1).getValueType() == MVT::i32 &&
7133  SDValue Val = N->getOperand(1).getOperand(0);
7134  if (Val.getValueType() == MVT::f32) {
7135  Val = DAG.getNode(ISD::FP_EXTEND, dl, MVT::f64, Val);
7136  DCI.AddToWorklist(Val.getNode());
7137  }
7138  Val = DAG.getNode(PPCISD::FCTIWZ, dl, MVT::f64, Val);
7139  DCI.AddToWorklist(Val.getNode());
7140 
7141  SDValue Ops[] = {
7142  N->getOperand(0), Val, N->getOperand(2),
7143  DAG.getValueType(N->getOperand(1).getValueType())
7144  };
7145 
7146  Val = DAG.getMemIntrinsicNode(PPCISD::STFIWX, dl,
7147  DAG.getVTList(MVT::Other), Ops, array_lengthof(Ops),
7148  cast<StoreSDNode>(N)->getMemoryVT(),
7149  cast<StoreSDNode>(N)->getMemOperand());
7150  DCI.AddToWorklist(Val.getNode());
7151  return Val;
7152  }
7153 
7154  // Turn STORE (BSWAP) -> sthbrx/stwbrx.
7155  if (cast<StoreSDNode>(N)->isUnindexed() &&
7156  N->getOperand(1).getOpcode() == ISD::BSWAP &&
7157  N->getOperand(1).getNode()->hasOneUse() &&
7158  (N->getOperand(1).getValueType() == MVT::i32 ||
7159  N->getOperand(1).getValueType() == MVT::i16 ||
7160  (TM.getSubtarget<PPCSubtarget>().hasLDBRX() &&
7161  TM.getSubtarget<PPCSubtarget>().isPPC64() &&
7162  N->getOperand(1).getValueType() == MVT::i64))) {
7163  SDValue BSwapOp = N->getOperand(1).getOperand(0);
7164  // Do an any-extend to 32-bits if this is a half-word input.
7165  if (BSwapOp.getValueType() == MVT::i16)
7166  BSwapOp = DAG.getNode(ISD::ANY_EXTEND, dl, MVT::i32, BSwapOp);
7167 
7168  SDValue Ops[] = {
7169  N->getOperand(0), BSwapOp, N->getOperand(2),
7170  DAG.getValueType(N->getOperand(1).getValueType())
7171  };
7172  return
7174  Ops, array_lengthof(Ops),
7175  cast<StoreSDNode>(N)->getMemoryVT(),
7176  cast<StoreSDNode>(N)->getMemOperand());
7177  }
7178  break;
7179  case ISD::LOAD: {
7180  LoadSDNode *LD = cast<LoadSDNode>(N);
7181  EVT VT = LD->getValueType(0);
7182  Type *Ty = LD->getMemoryVT().getTypeForEVT(*DAG.getContext());
7183  unsigned ABIAlignment = getDataLayout()->getABITypeAlignment(Ty);
7184  if (ISD::isNON_EXTLoad(N) && VT.isVector() &&
7185  TM.getSubtarget<PPCSubtarget>().hasAltivec() &&
7186  (VT == MVT::v16i8 || VT == MVT::v8i16 ||
7187  VT == MVT::v4i32 || VT == MVT::v4f32) &&
7188  LD->getAlignment() < ABIAlignment) {
7189  // This is a type-legal unaligned Altivec load.
7190  SDValue Chain = LD->getChain();
7191  SDValue Ptr = LD->getBasePtr();
7192 
7193  // This implements the loading of unaligned vectors as described in
7194  // the venerable Apple Velocity Engine overview. Specifically:
7195  // https://developer.apple.com/hardwaredrivers/ve/alignment.html
7196  // https://developer.apple.com/hardwaredrivers/ve/code_optimization.html
7197  //
7198  // The general idea is to expand a sequence of one or more unaligned
7199  // loads into a alignment-based permutation-control instruction (lvsl),
7200  // a series of regular vector loads (which always truncate their
7201  // input address to an aligned address), and a series of permutations.
7202  // The results of these permutations are the requested loaded values.
7203  // The trick is that the last "extra" load is not taken from the address
7204  // you might suspect (sizeof(vector) bytes after the last requested
7205  // load), but rather sizeof(vector) - 1 bytes after the last
7206  // requested vector. The point of this is to avoid a page fault if the
7207  // base address happend to be aligned. This works because if the base
7208  // address is aligned, then adding less than a full vector length will
7209  // cause the last vector in the sequence to be (re)loaded. Otherwise,
7210  // the next vector will be fetched as you might suspect was necessary.
7211 
7212  // We might be able to reuse the permutation generation from
7213  // a different base address offset from this one by an aligned amount.
7214  // The INTRINSIC_WO_CHAIN DAG combine will attempt to perform this
7215  // optimization later.
7217  DAG, dl, MVT::v16i8);
7218 
7219  // Refine the alignment of the original load (a "new" load created here
7220  // which was identical to the first except for the alignment would be
7221  // merged with the existing node regardless).
7222  MachineFunction &MF = DAG.getMachineFunction();
7223  MachineMemOperand *MMO =
7225  LD->getMemOperand()->getFlags(),
7226  LD->getMemoryVT().getStoreSize(),
7227  ABIAlignment);
7228  LD->refineAlignment(MMO);
7229  SDValue BaseLoad = SDValue(LD, 0);
7230 
7231  // Note that the value of IncOffset (which is provided to the next
7232  // load's pointer info offset value, and thus used to calculate the
7233  // alignment), and the value of IncValue (which is actually used to
7234  // increment the pointer value) are different! This is because we
7235  // require the next load to appear to be aligned, even though it
7236  // is actually offset from the base pointer by a lesser amount.
7237  int IncOffset = VT.getSizeInBits() / 8;
7238  int IncValue = IncOffset;
7239 
7240  // Walk (both up and down) the chain looking for another load at the real
7241  // (aligned) offset (the alignment of the other load does not matter in
7242  // this case). If found, then do not use the offset reduction trick, as
7243  // that will prevent the loads from being later combined (as they would
7244  // otherwise be duplicates).
7245  if (!findConsecutiveLoad(LD, DAG))
7246  --IncValue;
7247 
7248  SDValue Increment = DAG.getConstant(IncValue, getPointerTy());
7249  Ptr = DAG.getNode(ISD::ADD, dl, Ptr.getValueType(), Ptr, Increment);
7250 
7251  SDValue ExtraLoad =
7252  DAG.getLoad(VT, dl, Chain, Ptr,
7253  LD->getPointerInfo().getWithOffset(IncOffset),
7254  LD->isVolatile(), LD->isNonTemporal(),
7255  LD->isInvariant(), ABIAlignment);
7256 
7258  BaseLoad.getValue(1), ExtraLoad.getValue(1));
7259 
7260  if (BaseLoad.getValueType() != MVT::v4i32)
7261  BaseLoad = DAG.getNode(ISD::BITCAST, dl, MVT::v4i32, BaseLoad);
7262 
7263  if (ExtraLoad.getValueType() != MVT::v4i32)
7264  ExtraLoad = DAG.getNode(ISD::BITCAST, dl, MVT::v4i32, ExtraLoad);
7265 
7267  BaseLoad, ExtraLoad, PermCntl, DAG, dl);
7268 
7269  if (VT != MVT::v4i32)
7270  Perm = DAG.getNode(ISD::BITCAST, dl, VT, Perm);
7271 
7272  // Now we need to be really careful about how we update the users of the
7273  // original load. We cannot just call DCI.CombineTo (or
7274  // DAG.ReplaceAllUsesWith for that matter), because the load still has
7275  // uses created here (the permutation for example) that need to stay.
7276  SDNode::use_iterator UI = N->use_begin(), UE = N->use_end();
7277  while (UI != UE) {
7278  SDUse &Use = UI.getUse();
7279  SDNode *User = *UI;
7280  // Note: BaseLoad is checked here because it might not be N, but a
7281  // bitcast of N.
7282  if (User == Perm.getNode() || User == BaseLoad.getNode() ||
7283  User == TF.getNode() || Use.getResNo() > 1) {
7284  ++UI;
7285  continue;
7286  }
7287 
7288  SDValue To = Use.getResNo() ? TF : Perm;
7289  ++UI;
7290 
7292  for (SDNode::op_iterator O = User->op_begin(),
7293  OE = User->op_end(); O != OE; ++O) {
7294  if (*O == Use)
7295  Ops.push_back(To);
7296  else
7297  Ops.push_back(*O);
7298  }
7299 
7300  DAG.UpdateNodeOperands(User, Ops.data(), Ops.size());
7301  }
7302 
7303  return SDValue(N, 0);
7304  }
7305  }
7306  break;
7308  if (cast<ConstantSDNode>(N->getOperand(0))->getZExtValue() ==
7310  N->getOperand(1)->getOpcode() == ISD::ADD) {
7311  SDValue Add = N->getOperand(1);
7312 
7313  if (DAG.MaskedValueIsZero(Add->getOperand(1),
7314  APInt::getAllOnesValue(4 /* 16 byte alignment */).zext(
7316  SDNode *BasePtr = Add->getOperand(0).getNode();
7317  for (SDNode::use_iterator UI = BasePtr->use_begin(),
7318  UE = BasePtr->use_end(); UI != UE; ++UI) {
7319  if (UI->getOpcode() == ISD::INTRINSIC_WO_CHAIN &&
7320  cast<ConstantSDNode>(UI->getOperand(0))->getZExtValue() ==
7322  // We've found another LVSL, and this address if an aligned
7323  // multiple of that one. The results will be the same, so use the
7324  // one we've just found instead.
7325 
7326  return SDValue(*UI, 0);
7327  }
7328  }
7329  }
7330  }
7331 
7332  break;
7333  case ISD::BSWAP:
7334  // Turn BSWAP (LOAD) -> lhbrx/lwbrx.
7335  if (ISD::isNON_EXTLoad(N->getOperand(0).getNode()) &&
7336  N->getOperand(0).hasOneUse() &&
7337  (N->getValueType(0) == MVT::i32 || N->getValueType(0) == MVT::i16 ||
7338  (TM.getSubtarget<PPCSubtarget>().hasLDBRX() &&
7339  TM.getSubtarget<PPCSubtarget>().isPPC64() &&
7340  N->getValueType(0) == MVT::i64))) {
7341  SDValue Load = N->getOperand(0);
7342  LoadSDNode *LD = cast<LoadSDNode>(Load);
7343  // Create the byte-swapping load.
7344  SDValue Ops[] = {
7345  LD->getChain(), // Chain
7346  LD->getBasePtr(), // Ptr
7347  DAG.getValueType(N->getValueType(0)) // VT
7348  };
7349  SDValue BSLoad =
7351  DAG.getVTList(N->getValueType(0) == MVT::i64 ?
7353  Ops, 3, LD->getMemoryVT(), LD->getMemOperand());
7354 
7355  // If this is an i16 load, insert the truncate.
7356  SDValue ResVal = BSLoad;
7357  if (N->getValueType(0) == MVT::i16)
7358  ResVal = DAG.getNode(ISD::TRUNCATE, dl, MVT::i16, BSLoad);
7359 
7360  // First, combine the bswap away. This makes the value produced by the
7361  // load dead.
7362  DCI.CombineTo(N, ResVal);
7363 
7364  // Next, combine the load away, we give it a bogus result value but a real
7365  // chain result. The result value is dead because the bswap is dead.
7366  DCI.CombineTo(Load.getNode(), ResVal, BSLoad.getValue(1));
7367 
7368  // Return N so it doesn't get rechecked!
7369  return SDValue(N, 0);
7370  }
7371 
7372  break;
7373  case PPCISD::VCMP: {
7374  // If a VCMPo node already exists with exactly the same operands as this
7375  // node, use its result instead of this node (VCMPo computes both a CR6 and
7376  // a normal output).
7377  //
7378  if (!N->getOperand(0).hasOneUse() &&
7379  !N->getOperand(1).hasOneUse() &&
7380  !N->getOperand(2).hasOneUse()) {
7381 
7382  // Scan all of the users of the LHS, looking for VCMPo's that match.
7383  SDNode *VCMPoNode = 0;
7384 
7385  SDNode *LHSN = N->getOperand(0).getNode();
7386  for (SDNode::use_iterator UI = LHSN->use_begin(), E = LHSN->use_end();
7387  UI != E; ++UI)
7388  if (UI->getOpcode() == PPCISD::VCMPo &&
7389  UI->getOperand(1) == N->getOperand(1) &&
7390  UI->getOperand(2) == N->getOperand(2) &&
7391  UI->getOperand(0) == N->getOperand(0)) {
7392  VCMPoNode = *UI;
7393  break;
7394  }
7395 
7396  // If there is no VCMPo node, or if the flag value has a single use, don't
7397  // transform this.
7398  if (!VCMPoNode || VCMPoNode->hasNUsesOfValue(0, 1))
7399  break;
7400 
7401  // Look at the (necessarily single) use of the flag value. If it has a
7402  // chain, this transformation is more complex. Note that multiple things
7403  // could use the value result, which we should ignore.
7404  SDNode *FlagUser = 0;
7405  for (SDNode::use_iterator UI = VCMPoNode->use_begin();
7406  FlagUser == 0; ++UI) {
7407  assert(UI != VCMPoNode->use_end() && "Didn't find user!");
7408  SDNode *User = *UI;
7409  for (unsigned i = 0, e = User->getNumOperands(); i != e; ++i) {
7410  if (User->getOperand(i) == SDValue(VCMPoNode, 1)) {
7411  FlagUser = User;
7412  break;
7413  }
7414  }
7415  }
7416 
7417  // If the user is a MFOCRF instruction, we know this is safe.
7418  // Otherwise we give up for right now.
7419  if (FlagUser->getOpcode() == PPCISD::MFOCRF)
7420  return SDValue(VCMPoNode, 0);
7421  }
7422  break;
7423  }
7424  case ISD::BR_CC: {
7425  // If this is a branch on an altivec predicate comparison, lower this so
7426  // that we don't have to do a MFOCRF: instead, branch directly on CR6. This
7427  // lowering is done pre-legalize, because the legalizer lowers the predicate
7428  // compare down to code that is difficult to reassemble.
7429  ISD::CondCode CC = cast<CondCodeSDNode>(N->getOperand(1))->get();
7430  SDValue LHS = N->getOperand(2), RHS = N->getOperand(3);
7431 
7432  // Sometimes the promoted value of the intrinsic is ANDed by some non-zero
7433  // value. If so, pass-through the AND to get to the intrinsic.
7434  if (LHS.getOpcode() == ISD::AND &&
7436  cast<ConstantSDNode>(LHS.getOperand(0).getOperand(1))->getZExtValue() ==
7438  isa<ConstantSDNode>(LHS.getOperand(1)) &&
7439  !cast<ConstantSDNode>(LHS.getOperand(1))->getConstantIntValue()->
7440  isZero())
7441  LHS = LHS.getOperand(0);
7442 
7443  if (LHS.getOpcode() == ISD::INTRINSIC_W_CHAIN &&
7444  cast<ConstantSDNode>(LHS.getOperand(1))->getZExtValue() ==
7446  isa<ConstantSDNode>(RHS)) {
7447  assert((CC == ISD::SETEQ || CC == ISD::SETNE) &&
7448  "Counter decrement comparison is not EQ or NE");
7449 
7450  unsigned Val = cast<ConstantSDNode>(RHS)->getZExtValue();
7451  bool isBDNZ = (CC == ISD::SETEQ && Val) ||
7452  (CC == ISD::SETNE && !Val);
7453 
7454  // We now need to make the intrinsic dead (it cannot be instruction
7455  // selected).
7456  DAG.ReplaceAllUsesOfValueWith(LHS.getValue(1), LHS.getOperand(0));
7457  assert(LHS.getNode()->hasOneUse() &&
7458  "Counter decrement has more than one use");
7459 
7460  return DAG.getNode(isBDNZ ? PPCISD::BDNZ : PPCISD::BDZ, dl, MVT::Other,
7461  N->getOperand(0), N->getOperand(4));
7462  }
7463 
7464  int CompareOpc;
7465  bool isDot;
7466 
7467  if (LHS.getOpcode() == ISD::INTRINSIC_WO_CHAIN &&
7468  isa<ConstantSDNode>(RHS) && (CC == ISD::SETEQ || CC == ISD::SETNE) &&
7469  getAltivecCompareInfo(LHS, CompareOpc, isDot)) {
7470  assert(isDot && "Can't compare against a vector result!");
7471 
7472  // If this is a comparison against something other than 0/1, then we know
7473  // that the condition is never/always true.
7474  unsigned Val = cast<ConstantSDNode>(RHS)->getZExtValue();
7475  if (Val != 0 && Val != 1) {
7476  if (CC == ISD::SETEQ) // Cond never true, remove branch.
7477  return N->getOperand(0);
7478  // Always !=, turn it into an unconditional branch.
7479  return DAG.getNode(ISD::BR, dl, MVT::Other,
7480  N->getOperand(0), N->getOperand(4));
7481  }
7482 
7483  bool BranchOnWhenPredTrue = (CC == ISD::SETEQ) ^ (Val == 0);
7484 
7485  // Create the PPCISD altivec 'dot' comparison node.
7486  SDValue Ops[] = {
7487  LHS.getOperand(2), // LHS of compare
7488  LHS.getOperand(3), // RHS of compare
7489  DAG.getConstant(CompareOpc, MVT::i32)
7490  };
7491  EVT VTs[] = { LHS.getOperand(2).getValueType(), MVT::Glue };
7492  SDValue CompNode = DAG.getNode(PPCISD::VCMPo, dl, VTs, Ops, 3);
7493 
7494  // Unpack the result based on how the target uses it.
7495  PPC::Predicate CompOpc;
7496  switch (cast<ConstantSDNode>(LHS.getOperand(1))->getZExtValue()) {
7497  default: // Can't happen, don't crash on invalid number though.
7498  case 0: // Branch on the value of the EQ bit of CR6.
7499  CompOpc = BranchOnWhenPredTrue ? PPC::PRED_EQ : PPC::PRED_NE;
7500  break;
7501  case 1: // Branch on the inverted value of the EQ bit of CR6.
7502  CompOpc = BranchOnWhenPredTrue ? PPC::PRED_NE : PPC::PRED_EQ;
7503  break;
7504  case 2: // Branch on the value of the LT bit of CR6.
7505  CompOpc = BranchOnWhenPredTrue ? PPC::PRED_LT : PPC::PRED_GE;
7506  break;
7507  case 3: // Branch on the inverted value of the LT bit of CR6.
7508  CompOpc = BranchOnWhenPredTrue ? PPC::PRED_GE : PPC::PRED_LT;
7509  break;
7510  }
7511 
7512  return DAG.getNode(PPCISD::COND_BRANCH, dl, MVT::Other, N->getOperand(0),
7513  DAG.getConstant(CompOpc, MVT::i32),
7514  DAG.getRegister(PPC::CR6, MVT::i32),
7515  N->getOperand(4), CompNode.getValue(1));
7516  }
7517  break;
7518  }
7519  }
7520 
7521  return SDValue();
7522 }
7523 
7524 //===----------------------------------------------------------------------===//
7525 // Inline Assembly Support
7526 //===----------------------------------------------------------------------===//
7527 
7529  APInt &KnownZero,
7530  APInt &KnownOne,
7531  const SelectionDAG &DAG,
7532  unsigned Depth) const {
7533  KnownZero = KnownOne = APInt(KnownZero.getBitWidth(), 0);
7534  switch (Op.getOpcode()) {
7535  default: break;
7536  case PPCISD::LBRX: {
7537  // lhbrx is known to have the top bits cleared out.
7538  if (cast<VTSDNode>(Op.getOperand(2))->getVT() == MVT::i16)
7539  KnownZero = 0xFFFF0000;
7540  break;
7541  }
7542  case ISD::INTRINSIC_WO_CHAIN: {
7543  switch (cast<ConstantSDNode>(Op.getOperand(0))->getZExtValue()) {
7544  default: break;
7558  KnownZero = ~1U; // All bits but the low one are known to be zero.
7559  break;
7560  }
7561  }
7562  }
7563 }
7564 
7565 
7566 /// getConstraintType - Given a constraint, return the type of
7567 /// constraint it is for this target.
7569 PPCTargetLowering::getConstraintType(const std::string &Constraint) const {
7570  if (Constraint.size() == 1) {
7571  switch (Constraint[0]) {
7572  default: break;
7573  case 'b':
7574  case 'r':
7575  case 'f':
7576  case 'v':
7577  case 'y':
7578  return C_RegisterClass;
7579  case 'Z':
7580  // FIXME: While Z does indicate a memory constraint, it specifically
7581  // indicates an r+r address (used in conjunction with the 'y' modifier
7582  // in the replacement string). Currently, we're forcing the base
7583  // register to be r0 in the asm printer (which is interpreted as zero)
7584  // and forming the complete address in the second register. This is
7585  // suboptimal.
7586  return C_Memory;
7587  }
7588  }
7589  return TargetLowering::getConstraintType(Constraint);
7590 }
7591 
7592 /// Examine constraint type and operand type and determine a weight value.
7593 /// This object must already have been set up with the operand type
7594 /// and the current alternative constraint selected.
7597  AsmOperandInfo &info, const char *constraint) const {
7598  ConstraintWeight weight = CW_Invalid;
7599  Value *CallOperandVal = info.CallOperandVal;
7600  // If we don't have a value, we can't do a match,
7601  // but allow it at the lowest weight.
7602  if (CallOperandVal == NULL)
7603  return CW_Default;
7604  Type *type = CallOperandVal->getType();
7605  // Look at the constraint type.
7606  switch (*constraint) {
7607  default:
7608  weight = TargetLowering::getSingleConstraintMatchWeight(info, constraint);
7609  break;
7610  case 'b':
7611  if (type->isIntegerTy())
7612  weight = CW_Register;
7613  break;
7614  case 'f':
7615  if (type->isFloatTy())
7616  weight = CW_Register;
7617  break;
7618  case 'd':
7619  if (type->isDoubleTy())
7620  weight = CW_Register;
7621  break;
7622  case 'v':
7623  if (type->isVectorTy())
7624  weight = CW_Register;
7625  break;
7626  case 'y':
7627  weight = CW_Register;
7628  break;
7629  case 'Z':
7630  weight = CW_Memory;
7631  break;
7632  }
7633  return weight;
7634 }
7635 
7636 std::pair<unsigned, const TargetRegisterClass*>
7638  MVT VT) const {
7639  if (Constraint.size() == 1) {
7640  // GCC RS6000 Constraint Letters
7641  switch (Constraint[0]) {
7642  case 'b': // R1-R31
7643  if (VT == MVT::i64 && PPCSubTarget.isPPC64())
7644  return std::make_pair(0U, &PPC::G8RC_NOX0RegClass);
7645  return std::make_pair(0U, &PPC::GPRC_NOR0RegClass);
7646  case 'r': // R0-R31
7647  if (VT == MVT::i64 && PPCSubTarget.isPPC64())
7648  return std::make_pair(0U, &PPC::G8RCRegClass);
7649  return std::make_pair(0U, &PPC::GPRCRegClass);
7650  case 'f':
7651  if (VT == MVT::f32 || VT == MVT::i32)
7652  return std::make_pair(0U, &PPC::F4RCRegClass);
7653  if (VT == MVT::f64 || VT == MVT::i64)
7654  return std::make_pair(0U, &PPC::F8RCRegClass);
7655  break;
7656  case 'v':
7657  return std::make_pair(0U, &PPC::VRRCRegClass);
7658  case 'y': // crrc
7659  return std::make_pair(0U, &PPC::CRRCRegClass);
7660  }
7661  }
7662 
7663  std::pair<unsigned, const TargetRegisterClass*> R =
7665 
7666  // r[0-9]+ are used, on PPC64, to refer to the corresponding 64-bit registers
7667  // (which we call X[0-9]+). If a 64-bit value has been requested, and a
7668  // 32-bit GPR has been selected, then 'upgrade' it to the 64-bit parent
7669  // register.
7670  // FIXME: If TargetLowering::getRegForInlineAsmConstraint could somehow use
7671  // the AsmName field from *RegisterInfo.td, then this would not be necessary.
7672  if (R.first && VT == MVT::i64 && PPCSubTarget.isPPC64() &&
7673  PPC::GPRCRegClass.contains(R.first)) {
7675  return std::make_pair(TRI->getMatchingSuperReg(R.first,
7676  PPC::sub_32, &PPC::G8RCRegClass),
7677  &PPC::G8RCRegClass);
7678  }
7679 
7680  return R;
7681 }
7682 
7683 
7684 /// LowerAsmOperandForConstraint - Lower the specified operand into the Ops
7685 /// vector. If it is invalid, don't add anything to Ops.
7687  std::string &Constraint,
7688  std::vector<SDValue>&Ops,
7689  SelectionDAG &DAG) const {
7690  SDValue Result(0,0);
7691 
7692  // Only support length 1 constraints.
7693  if (Constraint.length() > 1) return;
7694 
7695  char Letter = Constraint[0];
7696  switch (Letter) {
7697  default: break;
7698  case 'I':
7699  case 'J':
7700  case 'K':
7701  case 'L':
7702  case 'M':
7703  case 'N':
7704  case 'O':
7705  case 'P': {
7707  if (!CST) return; // Must be an immediate to match.
7708  unsigned Value = CST->getZExtValue();
7709  switch (Letter) {
7710  default: llvm_unreachable("Unknown constraint letter!");
7711  case 'I': // "I" is a signed 16-bit constant.
7712  if ((short)Value == (int)Value)
7713  Result = DAG.getTargetConstant(Value, Op.getValueType());
7714  break;
7715  case 'J': // "J" is a constant with only the high-order 16 bits nonzero.
7716  case 'L': // "L" is a signed 16-bit constant shifted left 16 bits.
7717  if ((short)Value == 0)
7718  Result = DAG.getTargetConstant(Value, Op.getValueType());
7719  break;
7720  case 'K': // "K" is a constant with only the low-order 16 bits nonzero.
7721  if ((Value >> 16) == 0)
7722  Result = DAG.getTargetConstant(Value, Op.getValueType());
7723  break;
7724  case 'M': // "M" is a constant that is greater than 31.
7725  if (Value > 31)
7726  Result = DAG.getTargetConstant(Value, Op.getValueType());
7727  break;
7728  case 'N': // "N" is a positive constant that is an exact power of two.
7729  if ((int)Value > 0 && isPowerOf2_32(Value))
7730  Result = DAG.getTargetConstant(Value, Op.getValueType());
7731  break;
7732  case 'O': // "O" is the constant zero.
7733  if (Value == 0)
7734  Result = DAG.getTargetConstant(Value, Op.getValueType());
7735  break;
7736  case 'P': // "P" is a constant whose negation is a signed 16-bit constant.
7737  if ((short)-Value == (int)-Value)
7738  Result = DAG.getTargetConstant(Value, Op.getValueType());
7739  break;
7740  }
7741  break;
7742  }
7743  }
7744 
7745  if (Result.getNode()) {
7746  Ops.push_back(Result);
7747  return;
7748  }
7749 
7750  // Handle standard constraint letters.
7751  TargetLowering::LowerAsmOperandForConstraint(Op, Constraint, Ops, DAG);
7752 }
7753 
7754 // isLegalAddressingMode - Return true if the addressing mode represented
7755 // by AM is legal for this target, for a load/store of the specified type.
7757  Type *Ty) const {
7758  // FIXME: PPC does not allow r+i addressing modes for vectors!
7759 
7760  // PPC allows a sign-extended 16-bit immediate field.
7761  if (AM.BaseOffs <= -(1LL << 16) || AM.BaseOffs >= (1LL << 16)-1)
7762  return false;
7763 
7764  // No global is ever allowed as a base.
7765  if (AM.BaseGV)
7766  return false;
7767 
7768  // PPC only support r+r,
7769  switch (AM.Scale) {
7770  case 0: // "r+i" or just "i", depending on HasBaseReg.
7771  break;
7772  case 1:
7773  if (AM.HasBaseReg && AM.BaseOffs) // "r+r+i" is not allowed.
7774  return false;
7775  // Otherwise we have r+r or r+i.
7776  break;
7777  case 2:
7778  if (AM.HasBaseReg || AM.BaseOffs) // 2*r+r or 2*r+i is not allowed.
7779  return false;
7780  // Allow 2*r as r+r.
7781  break;
7782  default:
7783  // No other scales are supported.
7784  return false;
7785  }
7786 
7787  return true;
7788 }
7789 
7790 SDValue PPCTargetLowering::LowerRETURNADDR(SDValue Op,
7791  SelectionDAG &DAG) const {
7792  MachineFunction &MF = DAG.getMachineFunction();
7793  MachineFrameInfo *MFI = MF.getFrameInfo();
7794  MFI->setReturnAddressIsTaken(true);
7795 
7796  SDLoc dl(Op);
7797  unsigned Depth = cast<ConstantSDNode>(Op.getOperand(0))->getZExtValue();
7798 
7799  // Make sure the function does not optimize away the store of the RA to
7800  // the stack.
7801  PPCFunctionInfo *FuncInfo = MF.getInfo<PPCFunctionInfo>();
7802  FuncInfo->setLRStoreRequired();
7803  bool isPPC64 = PPCSubTarget.isPPC64();
7804  bool isDarwinABI = PPCSubTarget.isDarwinABI();
7805 
7806  if (Depth > 0) {
7807  SDValue FrameAddr = LowerFRAMEADDR(Op, DAG);
7808  SDValue Offset =
7809 
7810  DAG.getConstant(PPCFrameLowering::getReturnSaveOffset(isPPC64, isDarwinABI),
7811  isPPC64? MVT::i64 : MVT::i32);
7812  return DAG.getLoad(getPointerTy(), dl, DAG.getEntryNode(),
7813  DAG.getNode(ISD::ADD, dl, getPointerTy(),
7814  FrameAddr, Offset),
7815  MachinePointerInfo(), false, false, false, 0);
7816  }
7817 
7818  // Just load the return address off the stack.
7819  SDValue RetAddrFI = getReturnAddrFrameIndex(DAG);
7820  return DAG.getLoad(getPointerTy(), dl, DAG.getEntryNode(),
7821  RetAddrFI, MachinePointerInfo(), false, false, false, 0);
7822 }
7823 
7824 SDValue PPCTargetLowering::LowerFRAMEADDR(SDValue Op,
7825  SelectionDAG &DAG) const {
7826  SDLoc dl(Op);
7827  unsigned Depth = cast<ConstantSDNode>(Op.getOperand(0))->getZExtValue();
7828 
7829  EVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy();
7830  bool isPPC64 = PtrVT == MVT::i64;
7831 
7832  MachineFunction &MF = DAG.getMachineFunction();
7833  MachineFrameInfo *MFI = MF.getFrameInfo();
7834  MFI->setFrameAddressIsTaken(true);
7835 
7836  // Naked functions never have a frame pointer, and so we use r1. For all
7837  // other functions, this decision must be delayed until during PEI.
7838  unsigned FrameReg;
7840  AttributeSet::FunctionIndex, Attribute::Naked))
7841  FrameReg = isPPC64 ? PPC::X1 : PPC::R1;
7842  else
7843  FrameReg = isPPC64 ? PPC::FP8 : PPC::FP;
7844 
7845  SDValue FrameAddr = DAG.getCopyFromReg(DAG.getEntryNode(), dl, FrameReg,
7846  PtrVT);
7847  while (Depth--)
7848  FrameAddr = DAG.getLoad(Op.getValueType(), dl, DAG.getEntryNode(),
7849  FrameAddr, MachinePointerInfo(), false, false,
7850  false, 0);
7851  return FrameAddr;
7852 }
7853 
7854 bool
7856  // The PowerPC target isn't yet aware of offsets.
7857  return false;
7858 }
7859 
7860 /// getOptimalMemOpType - Returns the target specific optimal type for load
7861 /// and store operations as a result of memset, memcpy, and memmove
7862 /// lowering. If DstAlign is zero that means it's safe to destination
7863 /// alignment can satisfy any constraint. Similarly if SrcAlign is zero it
7864 /// means there isn't a need to check it against alignment requirement,
7865 /// probably because the source does not need to be loaded. If 'IsMemset' is
7866 /// true, that means it's expanding a memset. If 'ZeroMemset' is true, that
7867 /// means it's a memset of zero. 'MemcpyStrSrc' indicates whether the memcpy
7868 /// source is constant so it does not need to be loaded.
7869 /// It returns EVT::Other if the type should be determined using generic
7870 /// target-independent logic.
7872  unsigned DstAlign, unsigned SrcAlign,
7873  bool IsMemset, bool ZeroMemset,
7874  bool MemcpyStrSrc,
7875  MachineFunction &MF) const {
7876  if (this->PPCSubTarget.isPPC64()) {
7877  return MVT::i64;
7878  } else {
7879  return MVT::i32;
7880  }
7881 }
7882 
7884  bool *Fast) const {
7885  if (DisablePPCUnaligned)
7886  return false;
7887 
7888  // PowerPC supports unaligned memory access for simple non-vector types.
7889  // Although accessing unaligned addresses is not as efficient as accessing
7890  // aligned addresses, it is generally more efficient than manual expansion,
7891  // and generally only traps for software emulation when crossing page
7892  // boundaries.
7893 
7894  if (!VT.isSimple())
7895  return false;
7896 
7897  if (VT.getSimpleVT().isVector())
7898  return false;
7899 
7900  if (VT == MVT::ppcf128)
7901  return false;
7902 
7903  if (Fast)
7904  *Fast = true;
7905 
7906  return true;
7907 }
7908 
7910  VT = VT.getScalarType();
7911 
7912  if (!VT.isSimple())
7913  return false;
7914 
7915  switch (VT.getSimpleVT().SimpleTy) {
7916  case MVT::f32:
7917  case MVT::f64:
7918  return true;
7919  default:
7920  break;
7921  }
7922 
7923  return false;
7924 }
7925 
7927  if (DisableILPPref || PPCSubTarget.enableMachineScheduler())
7929 
7930  return Sched::ILP;
7931 }
7932 
7933 // Create a fast isel object.
7934 FastISel *
7936  const TargetLibraryInfo *LibInfo) const {
7937  return PPC::createFastISel(FuncInfo, LibInfo);
7938 }
MVT getSimpleValueType() const
Return the simple ValueType of the referenced return value.
void setFrameAddressIsTaken(bool T)
void AnalyzeCallResult(const SmallVectorImpl< ISD::InputArg > &Ins, CCAssignFn Fn)
const MachineFunction * getParent() const
The memory access reads data.
SDValue getConstant(uint64_t Val, EVT VT, bool isTarget=false)
SDValue getValue(unsigned R) const
FastISel * createFastISel(FunctionLoweringInfo &FuncInfo, const TargetLibraryInfo *LibInfo)
SDValue get_VSPLTI_elt(SDNode *N, unsigned ByteSize, SelectionDAG &DAG)
The memory access writes data.
MVT getValVT() const
ch, gl = CR6[UN]SET ch, inglue - Toggle CR bit 6 for SVR4 vararg calls
void setVarArgsNumGPR(unsigned Num)
raw_ostream & errs()
bool use64BitRegs() const
Definition: PPCSubtarget.h:163
#define R4(n)
static APInt getAllOnesValue(unsigned numBits)
Get the all-ones value.
Definition: APInt.h:450
LLVMContext * getContext() const
Definition: SelectionDAG.h:285
SDValue getCopyToReg(SDValue Chain, SDLoc dl, unsigned Reg, SDValue N)
Definition: SelectionDAG.h:487
uint64_t getZExtValue() const
Get zero extended value.
Definition: APInt.h:1306
SDValue getCALLSEQ_END(SDValue Chain, SDValue Op1, SDValue Op2, SDValue InGlue, SDLoc DL)
Definition: SelectionDAG.h:572
virtual void insertSelect(MachineBasicBlock &MBB, MachineBasicBlock::iterator I, DebugLoc DL, unsigned DstReg, const SmallVectorImpl< MachineOperand > &Cond, unsigned TrueReg, unsigned FalseReg) const
unsigned getVSPLTImmediate(SDNode *N, unsigned EltSize)
bool hasPOPCNTD() const
Definition: PPCSubtarget.h:193
static const uint16_t * GetFPR()
static cl::opt< bool > DisablePPCUnaligned("disable-ppc-unaligned", cl::desc("disable unaligned load/store generation on PPC"), cl::Hidden)
static SDValue LowerLabelRef(SDValue HiPart, SDValue LoPart, bool isPIC, SelectionDAG &DAG)
int isVSLDOIShuffleMask(SDNode *N, bool isUnary)
Reloc::Model getRelocationModel() const
static void PrepareTailCall(SelectionDAG &DAG, SDValue &InFlag, SDValue &Chain, SDLoc dl, bool isPPC64, int SPDiff, unsigned NumBytes, SDValue LROp, SDValue FPOp, bool isDarwinABI, SmallVectorImpl< TailCallArgumentInfo > &TailCallArguments)
Return with a flag operand, matched by 'blr'.
LocInfo getLocInfo() const
void setSupportJumpTables(bool Val)
Indicate whether the target can generate code for jump tables.
bool hasOneUse() const
bool hasNUsesOfValue(unsigned NUses, unsigned Value) const
virtual bool allowsUnalignedMemoryAccesses(EVT VT, bool *Fast=0) const
bool hasOneUse() const
bool CC_PPC32_SVR4_Custom_AlignFPArgRegs(unsigned &ValNo, MVT &ValVT, MVT &LocVT, CCValAssign::LocInfo &LocInfo, ISD::ArgFlagsTy &ArgFlags, CCState &State)
MachineBasicBlock * EmitPartwordAtomicBinary(MachineInstr *MI, MachineBasicBlock *MBB, bool is8bit, unsigned Opcode) const
const TargetMachine & getTargetMachine() const
bool isDarwinABI() const
Definition: PPCSubtarget.h:208
unsigned createVirtualRegister(const TargetRegisterClass *RegClass)
static SDValue BuildIntrinsicOp(unsigned IID, SDValue Op, SelectionDAG &DAG, SDLoc dl, EVT DestVT=MVT::Other)
enable_if_c<!is_simple_type< Y >::value, typename cast_retty< X, const Y >::ret_type >::type dyn_cast(const Y &Val)
Definition: Casting.h:266
bool hasISEL() const
Definition: PPCSubtarget.h:192
const TargetMachine & getTarget() const
Definition: SelectionDAG.h:281
virtual const uint32_t * getCallPreservedMask(CallingConv::ID) const
virtual FastISel * createFastISel(FunctionLoweringInfo &FuncInfo, const TargetLibraryInfo *LibInfo) const
MachineMemOperand * getMachineMemOperand(MachinePointerInfo PtrInfo, unsigned f, uint64_t s, unsigned base_alignment, const MDNode *TBAAInfo=0, const MDNode *Ranges=0)
unsigned addLiveIn(unsigned PReg, const TargetRegisterClass *RC)
virtual ConstraintType getConstraintType(const std::string &Constraint) const
Given a constraint, return the type of constraint it is for this target.
static SDValue EmitTailCallStoreFPAndRetAddr(SelectionDAG &DAG, MachineFunction &MF, SDValue Chain, SDValue OldRetAddr, SDValue OldFP, int SPDiff, bool isPPC64, bool isDarwinABI, SDLoc dl)
MO_LO, MO_HA - lo16(symbol) and ha16(symbol)
Definition: PPC.h:78
static bool findConsecutiveLoad(LoadSDNode *LD, SelectionDAG &DAG)
const GlobalValue * getGlobal() const
bool enableMachineScheduler() const
bool CCAssignFn(unsigned ValNo, MVT ValVT, MVT LocVT, CCValAssign::LocInfo LocInfo, ISD::ArgFlagsTy ArgFlags, CCState &State)
unsigned getOpcode() const
SDValue getSelectCC(SDLoc DL, SDValue LHS, SDValue RHS, SDValue True, SDValue False, ISD::CondCode Cond)
Definition: SelectionDAG.h:679
void setFramePointerSaveIndex(int Idx)
Type * getTypeForEVT(LLVMContext &Context) const
Definition: ValueTypes.cpp:180
unsigned getSizeInBits() const
Definition: ValueTypes.h:359
iterator insert(iterator I, const T &Elt)
Definition: SmallVector.h:537
bool isDoubleTy() const
isDoubleTy - Return true if this is 'double', a 64-bit IEEE fp type.
Definition: Type.h:149
unsigned getByValSize() const
unsigned getNumOperands() const
bool isDarwin() const
isDarwin - True if this is any darwin platform.
Definition: PPCSubtarget.h:202
void setBooleanVectorContents(BooleanContent Ty)
unsigned getNumOperands() const
void transferSuccessorsAndUpdatePHIs(MachineBasicBlock *fromMBB)
const SDValue & getOperand(unsigned Num) const
F(f)
const Function * getFunction() const
unsigned getVarArgsNumGPR() const
static bool isLocalCall(const SDValue &Callee)
std::pair< unsigned, const TargetRegisterClass * > getRegForInlineAsmConstraint(const std::string &Constraint, MVT VT) const
#define R2(n)
SDValue getTargetExternalSymbol(const char *Sym, EVT VT, unsigned char TargetFlags=0)
void ComputeMaskedBits(SDValue Op, APInt &KnownZero, APInt &KnownOne, unsigned Depth=0) const
bool hasAttribute(unsigned Index, Attribute::AttrKind Kind) const
Return true if the attribute exists at the given index.
Definition: Attributes.cpp:818
F8RC = MFFS - This moves the FPSCR (not modeled) into the register.
unsigned getValNo() const
const SDValue & getBasePtr() const
void setVarArgsNumFPR(unsigned Num)
bool hasAltivec() const
Definition: PPCSubtarget.h:189
Naked function.
Definition: Attributes.h:78
bool bitsLT(EVT VT) const
bitsLT - Return true if this has less bits than VT.
Definition: ValueTypes.h:735
bool isRegLoc() const
LoopInfoBase< BlockT, LoopT > * LI
Definition: LoopInfoImpl.h:411
virtual bool isOffsetFoldingLegal(const GlobalAddressSDNode *GA) const
SDValue getExternalSymbol(const char *Sym, EVT VT)
bool hasQPX() const
Definition: PPCSubtarget.h:190
CallingConv::ID getCallingConv() const
Definition: Function.h:161
void setTruncStoreAction(MVT ValVT, MVT MemVT, LegalizeAction Action)
static MachinePointerInfo getFixedStack(int FI, int64_t offset=0)
bool hasLazyResolverStub(const GlobalValue *GV, const TargetMachine &TM) const
virtual bool getPreIndexedAddressParts(SDNode *N, SDValue &Base, SDValue &Offset, ISD::MemIndexedMode &AM, SelectionDAG &DAG) const
bool isVMRGHShuffleMask(ShuffleVectorSDNode *N, unsigned UnitSize, bool isUnary)
SDValue getLoad(EVT VT, SDLoc dl, SDValue Chain, SDValue Ptr, MachinePointerInfo PtrInfo, bool isVolatile, bool isNonTemporal, bool isInvariant, unsigned Alignment, const MDNode *TBAAInfo=0, const MDNode *Ranges=0)
bool isVector() const
isVector - Return true if this is a vector value type.
Definition: ValueTypes.h:661
bool isMacOSX() const
Definition: Triple.h:303
lazy value info
void setVarArgsStackOffset(int Offset)
static error_code advance(T &it, size_t Val)
std::string getEVTString() const
getEVTString - This function returns value type as a string, e.g. "i32".
Definition: ValueTypes.cpp:106
SDValue getStore(SDValue Chain, SDLoc dl, SDValue Val, SDValue Ptr, MachinePointerInfo PtrInfo, bool isVolatile, bool isNonTemporal, unsigned Alignment, const MDNode *TBAAInfo=0)
const HexagonInstrInfo * TII
MachineBasicBlock * emitEHSjLjLongJmp(MachineInstr *MI, MachineBasicBlock *MBB) const
bool hasFPRND() const
Definition: PPCSubtarget.h:187
static cl::opt< bool > DisablePPCPreinc("disable-ppc-preinc", cl::desc("disable preincrement load/store generation on PPC"), cl::Hidden)
#define llvm_unreachable(msg)
EVT getValueType(unsigned ResNo) const
Definition: Use.h:60
static SDValue BuildVSLDOI(SDValue LHS, SDValue RHS, unsigned Amt, EVT VT, SelectionDAG &DAG, SDLoc dl)
MachineFunction & getMachineFunction() const
Definition: SelectionDAG.h:280
virtual bool isFMAFasterThanFMulAndFAdd(EVT VT) const
unsigned getMinReservedArea() const
static void getMaxByValAlign(Type *Ty, unsigned &MaxAlign, unsigned MaxMaxAlign)
SDValue getTargetGlobalAddress(const GlobalValue *GV, SDLoc DL, EVT VT, int64_t offset=0, unsigned char TargetFlags=0)
Definition: SelectionDAG.h:434
bool isJITCodeModel() const
Definition: PPCSubtarget.h:172
const TargetRegisterClass * getRegClass(unsigned Reg) const
void assign(unsigned NumElts, const T &Elt)
Definition: SmallVector.h:470
void setCondCodeAction(ISD::CondCode CC, MVT VT, LegalizeAction Action)
MachinePointerInfo getWithOffset(int64_t O) const
SimpleValueType SimpleTy
Definition: ValueTypes.h:161
EVT getScalarType() const
Definition: ValueTypes.h:756
Abstract Stack Frame Information.
void setOperationAction(unsigned Op, MVT VT, LegalizeAction Action)
int getMaskElt(unsigned Idx) const
SDVTList getVTList(EVT VT)
unsigned getStoreSize() const
Definition: ValueTypes.h:433
virtual MVT getPointerTy(uint32_t=0) const
ID
LLVM Calling Convention Representation.
Definition: CallingConv.h:26
bool isVPKUHUMShuffleMask(ShuffleVectorSDNode *N, bool isUnary)
const MachineInstrBuilder & addImm(int64_t Val) const
Sched::Preference getSchedulingPreference() const
Return target scheduling preference.
#define G(x, y, z)
Definition: MD5.cpp:52
bool isInteger() const
isInteger - Return true if this is an integer, or a vector integer type.
Definition: ValueTypes.h:656
bool getBoolValue() const
Convert APInt to a boolean value.
Definition: APInt.h:404
SDValue getConstantFP(double Val, EVT VT, bool isTarget=false)
SmallVector< ISD::InputArg, 32 > Ins
static void fixupFuncForFI(SelectionDAG &DAG, int FrameIdx, EVT VT)
MachineBasicBlock * EmitAtomicBinary(MachineInstr *MI, MachineBasicBlock *MBB, bool is64Bit, unsigned BinOpcode) const
EVT getVectorElementType() const
Definition: ValueTypes.h:762
SDValue getCALLSEQ_START(SDValue Chain, SDValue Op, SDLoc DL)
Definition: SelectionDAG.h:563
unsigned isMacOSXVersionLT(unsigned Major, unsigned Minor=0, unsigned Micro=0) const
Definition: Triple.h:288
bool isSplatShuffleMask(ShuffleVectorSDNode *N, unsigned EltSize)
unsigned getLocReg() const
static bool isVMerge(ShuffleVectorSDNode *N, unsigned UnitSize, unsigned LHSStart, unsigned RHSStart)
std::pair< SDValue, SDValue > LowerCallTo(CallLoweringInfo &CLI) const
size_t array_lengthof(T(&)[N])
Find the length of an array.
Definition: STLExtras.h:250
bool LLVM_ATTRIBUTE_UNUSED_RESULT empty() const
Definition: SmallVector.h:56
SDValue getRegisterMask(const uint32_t *RegMask)
static unsigned getLinkageSize(bool isPPC64, bool isDarwinABI)
SDValue getTargetFrameIndex(int FI, EVT VT)
Definition: SelectionDAG.h:440
int getOpcode() const
Definition: MachineInstr.h:261
SDValue CombineTo(SDNode *N, const std::vector< SDValue > &To, bool AddTo=true)
This contains information for each constraint that we are lowering.
Simple integer binary arithmetic operators.
Definition: ISDOpcodes.h:176
unsigned getMatchingSuperReg(unsigned Reg, unsigned SubIdx, const TargetRegisterClass *RC) const
SDValue getVectorShuffle(EVT VT, SDLoc dl, SDValue N1, SDValue N2, const int *MaskElts)
SmallVector< ISD::OutputArg, 32 > Outs
bool insert(const T &V)
Definition: SmallSet.h:59
virtual std::pair< unsigned, const TargetRegisterClass * > getRegForInlineAsmConstraint(const std::string &Constraint, MVT VT) const
Reciprocal estimate instructions (unary FP ops).
EVT getMemoryVT() const
getMemoryVT - Return the type of the in-memory value.
int64_t getImm() const
static bool isFloatingPointZero(SDValue Op)
isFloatingPointZero - Return true if this is 0.0 or -0.0.
bool CC_PPC32_SVR4_Custom_AlignArgRegs(unsigned &ValNo, MVT &ValVT, MVT &LocVT, CCValAssign::LocInfo &LocInfo, ISD::ArgFlagsTy &ArgFlags, CCState &State)
static bool getAltivecCompareInfo(SDValue Intrin, int &CompareOpc, bool &isDot)
const BasicBlock * getBasicBlock() const
UNDEF - An undefined node.
Definition: ISDOpcodes.h:154
uint32_t FloatToBits(float Float)
Definition: MathExtras.h:516
bool isConstantSplat(APInt &SplatValue, APInt &SplatUndef, unsigned &SplatBitSize, bool &HasAnyUndefs, unsigned MinSplatBits=0, bool isBigEndian=false)
mmo_iterator memoperands_end() const
Definition: MachineInstr.h:292
SDNode * getNode() const
get the SDNode which holds the desired result
PPCTargetLowering(PPCTargetMachine &TM)
bundle_iterator< MachineInstr, instr_iterator > iterator
unsigned getStoreSize() const
Definition: ValueTypes.h:787
unsigned int getFlags() const
getFlags - Return the raw flags of the source value,
const MachineInstrBuilder & setMemRefs(MachineInstr::mmo_iterator b, MachineInstr::mmo_iterator e) const
virtual void computeMaskedBitsForTargetNode(const SDValue Op, APInt &KnownZero, APInt &KnownOne, const SelectionDAG &DAG, unsigned Depth=0) const
Control flow instructions. These all have token chains.
Definition: ISDOpcodes.h:475
static void LowerMemOpCallTo(SelectionDAG &DAG, MachineFunction &MF, SDValue Chain, SDValue Arg, SDValue PtrOff, int SPDiff, unsigned ArgOffset, bool isPPC64, bool isTailCall, bool isVector, SmallVectorImpl< SDValue > &MemOpChains, SmallVectorImpl< TailCallArgumentInfo > &TailCallArguments, SDLoc dl)
bool isVMRGLShuffleMask(ShuffleVectorSDNode *N, unsigned UnitSize, bool isUnary)
const SDValue & getBasePtr() const
MachineBasicBlock * CreateMachineBasicBlock(const BasicBlock *bb=0)
static SDValue GeneratePerfectShuffle(unsigned PFEntry, SDValue LHS, SDValue RHS, SelectionDAG &DAG, SDLoc dl)
LLVM Basic Block Representation.
Definition: BasicBlock.h:72
const Triple & getTargetTriple() const
Definition: PPCSubtarget.h:199
const SDValue & getOperand(unsigned i) const
void addRegisterClass(MVT VT, const TargetRegisterClass *RC)
Simple binary floating point operators.
Definition: ISDOpcodes.h:222
void setTargetDAGCombine(ISD::NodeType NT)
bool isNonTemporal() const
bool isVectorTy() const
Definition: Type.h:229
MVT getLocVT() const
LLVM Constant Representation.
Definition: Constant.h:41
static SDValue CreateCopyOfByValArgument(SDValue Src, SDValue Dst, SDValue Chain, ISD::ArgFlagsTy Flags, SelectionDAG &DAG, SDLoc dl)
bool isVector() const
isVector - Return true if this is a vector value type.
Definition: ValueTypes.h:190
virtual void ReplaceNodeResults(SDNode *N, SmallVectorImpl< SDValue > &Results, SelectionDAG &DAG) const
static bool GetLabelAccessInfo(const TargetMachine &TM, unsigned &HiOpFlags, unsigned &LoOpFlags, const GlobalValue *GV=0)
const Constant * getConstVal() const
bool isFloatingPoint() const
isFloatingPoint - Return true if this is a FP, or a vector FP type.
Definition: ValueTypes.h:174
const MachineOperand & getOperand(unsigned i) const
Definition: MachineInstr.h:267
unsigned getVarArgsNumFPR() const
static int CalculateTailCallSPDiff(SelectionDAG &DAG, bool isTailCall, unsigned ParamSize)
bool isFloatTy() const
isFloatTy - Return true if this is 'float', a 32-bit IEEE fp type.
Definition: Type.h:146
unsigned getLiveInVirtReg(unsigned PReg) const
void setBooleanContents(BooleanContent Ty)
static Type * getVoidTy(LLVMContext &C)
Definition: Type.cpp:227
static void CalculateTailCallArgDest(SelectionDAG &DAG, MachineFunction &MF, bool isPPC64, SDValue Arg, int SPDiff, unsigned ArgOffset, SmallVectorImpl< TailCallArgumentInfo > &TailCallArguments)
ItTy next(ItTy it, Dist n)
Definition: STLExtras.h:154
SDValue getCopyFromReg(SDValue Chain, SDLoc dl, unsigned Reg, EVT VT)
Definition: SelectionDAG.h:510
static bool isIntS16Immediate(SDNode *N, short &Imm)
const DataLayout * getDataLayout() const
unsigned getBitWidth() const
Return the number of bits in the APInt.
Definition: APInt.h:1252
void setVarArgsFrameIndex(int Index)
unsigned getOpcode() const
TRAP - Trapping instruction.
Definition: ISDOpcodes.h:586
static bool isConsecutiveLS(LSBaseSDNode *LS, LSBaseSDNode *Base, unsigned Bytes, int Dist, SelectionDAG &DAG)
unsigned GuaranteedTailCallOpt
void setPrefFunctionAlignment(unsigned Align)
bool hasFSQRT() const
Definition: PPCSubtarget.h:179
bool has64BitSupport() const
Definition: PPCSubtarget.h:158
static const unsigned NumArgRegs
arg_iterator arg_begin()
Definition: Function.h:410
static void StoreTailCallArgumentsToStackSlot(SelectionDAG &DAG, SDValue Chain, const SmallVectorImpl< TailCallArgumentInfo > &TailCallArgs, SmallVectorImpl< SDValue > &MemOpChains, SDLoc dl)
StoreTailCallArgumentsToStackSlot - Stores arguments to their stack slot.
bool isAllNegativeZeroVector(SDNode *N)
use_iterator use_begin() const
bool isVolatile() const
void AddPromotedToType(unsigned Opc, MVT OrigVT, MVT DestVT)
unsigned MaxStoresPerMemmove
Specify maximum bytes of store instructions per memmove call.
Bit counting operators with an undefined result for zero inputs.
Definition: ISDOpcodes.h:312
MachineInstrBuilder BuildMI(MachineFunction &MF, DebugLoc DL, const MCInstrDesc &MCID)
virtual SDValue LowerOperation(SDValue Op, SelectionDAG &DAG) const
bool MaskedValueIsZero(SDValue Op, const APInt &Mask, unsigned Depth=0) const
Like a regular LOAD but additionally taking/producing a flag.
X = FP_EXTEND(Y) - Extend a smaller FP type into a larger FP type.
Definition: ISDOpcodes.h:411
virtual const TargetFrameLowering * getFrameLowering() const
static unsigned getMinCallFrameSize(bool isPPC64, bool isDarwinABI)
std::vector< ArgListEntry > ArgListTy
void setMinReservedArea(unsigned size)
static SDValue BuildSplatI(int Val, unsigned SplatSize, EVT VT, SelectionDAG &DAG, SDLoc dl)
virtual const PPCSubtarget * getSubtargetImpl() const
unsigned getFirstUnallocated(const uint16_t *Regs, unsigned NumRegs) const
bool hasFPCVT() const
Definition: PPCSubtarget.h:188
bool isSVR4ABI() const
Definition: PPCSubtarget.h:209
void setUseUnderscoreLongJmp(bool Val)
const MCInstrDesc & get(unsigned Opcode) const
Definition: MCInstrInfo.h:48
SDValue CreateStackTemporary(EVT VT, unsigned minAlign=1)
bool hasRecipPrec() const
Definition: PPCSubtarget.h:184
bool isEXTLoad(const SDNode *N)
const MachinePointerInfo & getPointerInfo() const
int64_t getObjectOffset(int ObjectIdx) const
SDValue getMemcpy(SDValue Chain, SDLoc dl, SDValue Dst, SDValue Src, SDValue Size, unsigned Align, bool isVol, bool AlwaysInline, MachinePointerInfo DstPtrInfo, MachinePointerInfo SrcPtrInfo)
bool hasLFIWAX() const
Definition: PPCSubtarget.h:186
unsigned getByValAlign() const
void setLoadExtAction(unsigned ExtType, MVT VT, LegalizeAction Action)
ArrayRef< int > getMask() const
static bool isZero(Value *V, DataLayout *DL)
Definition: Lint.cpp:507
IntegerType * getIntPtrType(LLVMContext &C, unsigned AddressSpace=0) const
Definition: DataLayout.cpp:610
SDValue getTargetConstantPool(const Constant *C, EVT VT, unsigned Align=0, int Offset=0, unsigned char TargetFlags=0)
Definition: SelectionDAG.h:451
SDValue getTargetJumpTable(int JTI, EVT VT, unsigned char TargetFlags=0)
Definition: SelectionDAG.h:445
virtual const char * getTargetNodeName(unsigned Opcode) const
virtual const TargetInstrInfo * getInstrInfo() const
const uint32_t * getNoPreservedMask() const
unsigned getABITypeAlignment(Type *Ty) const
Definition: DataLayout.cpp:582
bool isVPKUWUMShuffleMask(ShuffleVectorSDNode *N, bool isUnary)
const STC & getSubtarget() const
#define R6(n)
void setNode(SDNode *N)
set the SDNode
static unsigned getFramePointerSaveOffset(bool isPPC64, bool isDarwinABI)
bool isBaseWithConstantOffset(SDValue Op) const
void setExceptionPointerRegister(unsigned R)
bool isInvariant() const
bool CC_PPC32_SVR4_Custom_Dummy(unsigned &ValNo, MVT &ValVT, MVT &LocVT, CCValAssign::LocInfo &LocInfo, ISD::ArgFlagsTy &ArgFlags, CCState &State)
SDNode * UpdateNodeOperands(SDNode *N, SDValue Op)
bool isPredecessorOf(const SDNode *N) const
unsigned getObjectAlignment(int ObjectIdx) const
getObjectAlignment - Return the alignment of the specified stack object.
bool SelectAddressRegRegOnly(SDValue N, SDValue &Base, SDValue &Index, SelectionDAG &DAG) const
SDValue getExtLoad(ISD::LoadExtType ExtType, SDLoc dl, EVT VT, SDValue Chain, SDValue Ptr, MachinePointerInfo PtrInfo, EVT MemVT, bool isVolatile, bool isNonTemporal, unsigned Alignment, const MDNode *TBAAInfo=0)
Type * getType() const
Definition: Value.h:111
CCValAssign - Represent assignment of one arg/retval to a location.
virtual void LowerAsmOperandForConstraint(SDValue Op, std::string &Constraint, std::vector< SDValue > &Ops, SelectionDAG &DAG) const
Predicate
Predicate - These are "(BI << 5) | BO" for various predicates.
Definition: PPCPredicates.h:27
static unsigned CalculateStackSlotSize(EVT ArgVT, ISD::ArgFlagsTy Flags, unsigned PtrByteSize)
SDValue getIntPtrConstant(uint64_t Val, bool isTarget=false)
const SDValue & getChain() const
virtual EVT getSetCCResultType(LLVMContext &Context, EVT VT) const
getSetCCResultType - Return the ISD::SETCC ValueType
virtual EVT getOptimalMemOpType(uint64_t Size, unsigned DstAlign, unsigned SrcAlign, bool IsMemset, bool ZeroMemset, bool MemcpyStrSrc, MachineFunction &MF) const
Byte Swap and Counting operators.
Definition: ISDOpcodes.h:309
MachineMemOperand * getMemOperand() const
static SDNode * isBLACompatibleAddress(SDValue Op, SelectionDAG &DAG)
virtual bool isGAPlusOffset(SDNode *N, const GlobalValue *&GA, int64_t &Offset) const
MachineFrameInfo * getFrameInfo()
static unsigned CalculateParameterAndLinkageAreaSize(SelectionDAG &DAG, bool isPPC64, bool isVarArg, unsigned CC, const SmallVectorImpl< ISD::OutputArg > &Outs, const SmallVectorImpl< SDValue > &OutVals, unsigned &nAltivecParamsAtEnd)
MachineBasicBlock * emitEHSjLjSetJmp(MachineInstr *MI, MachineBasicBlock *MBB) const
virtual SDValue PerformDAGCombine(SDNode *N, DAGCombinerInfo &DCI) const
unsigned Log2_32(uint32_t Value)
Definition: MathExtras.h:443
void swap(llvm::BitVector &LHS, llvm::BitVector &RHS)
Implement std::swap in terms of BitVector swap.
Definition: BitVector.h:591
AttributeSet getAttributes() const
Return the attribute list for this Function.
Definition: Function.h:170
bool isPPC64() const
Definition: PPCSubtarget.h:154
void setIndexedLoadAction(unsigned IdxMode, MVT VT, LegalizeAction Action)
Class for arbitrary precision integers.
Definition: APInt.h:75
SDValue getMemIntrinsicNode(unsigned Opcode, SDLoc dl, const EVT *VTs, unsigned NumVTs, const SDValue *Ops, unsigned NumOps, EVT MemVT, MachinePointerInfo PtrInfo, unsigned Align=0, bool Vol=false, bool ReadMem=true, bool WriteMem=true)
void setExceptionSelectorRegister(unsigned R)
ConstraintType getConstraintType(const std::string &Constraint) const
void setMinFunctionAlignment(unsigned Align)
Set the target's minimum function alignment (in log2(bytes))
op_iterator op_begin() const
bool isIntegerTy() const
Definition: Type.h:196
virtual const TargetRegisterClass * getRegClassFor(MVT VT) const
static use_iterator use_end()
ZERO_EXTEND - Used for integer types, zeroing the new bits.
Definition: ISDOpcodes.h:357
bool isMemLoc() const
SDValue getNode(unsigned Opcode, SDLoc DL, EVT VT)
ANY_EXTEND - Used for integer types. The high bits are undefined.
Definition: ISDOpcodes.h:360
bool SelectAddressRegReg(SDValue N, SDValue &Base, SDValue &Index, SelectionDAG &DAG) const
static cl::opt< AlignMode > Align(cl::desc("Load/store alignment support"), cl::Hidden, cl::init(DefaultAlign), cl::values(clEnumValN(DefaultAlign,"arm-default-align","Generate unaligned accesses only on hardware/OS ""combinations that are known to support them"), clEnumValN(StrictAlign,"arm-strict-align","Disallow all unaligned memory accesses"), clEnumValN(NoStrictAlign,"arm-no-strict-align","Allow unaligned memory accesses"), clEnumValEnd))
bool hasSTFIWX() const
Definition: PPCSubtarget.h:185
static unsigned PrepareCall(SelectionDAG &DAG, SDValue &Callee, SDValue &InFlag, SDValue &Chain, SDLoc dl, int SPDiff, bool isTailCall, SmallVectorImpl< std::pair< unsigned, SDValue > > &RegsToPass, SmallVectorImpl< SDValue > &Ops, std::vector< EVT > &NodeTys, const PPCSubtarget &PPCSubTarget)
ConstraintWeight getSingleConstraintMatchWeight(AsmOperandInfo &info, const char *constraint) const
int CreateStackObject(uint64_t Size, unsigned Alignment, bool isSS, bool MayNeedSP=false, const AllocaInst *Alloca=0)
SDValue getStackArgumentTokenFactor(SDValue Chain)
SDValue getTargetBlockAddress(const BlockAddress *BA, EVT VT, int64_t Offset=0, unsigned char TargetFlags=0)
Definition: SelectionDAG.h:481
static const unsigned PerfectShuffleTable[6561+1]
SmallVector< SDValue, 32 > OutVals
Bitwise operators - logical and, logical or, logical xor.
Definition: ISDOpcodes.h:295
pointer data()
data - Return a pointer to the vector's buffer, even if empty().
Definition: SmallVector.h:135
static TargetLoweringObjectFile * CreateTLOF(const PPCTargetMachine &TM)
void splice(iterator Where, MachineBasicBlock *Other, iterator From)
MachineRegisterInfo & getRegInfo()
virtual void LowerAsmOperandForConstraint(SDValue Op, std::string &Constraint, std::vector< SDValue > &Ops, SelectionDAG &DAG) const
unsigned getSizeInBits() const
getSizeInBits - Return the size of the specified value type in bits.
Definition: ValueTypes.h:779
#define I(x, y, z)
Definition: MD5.cpp:54
#define N
int32_t SignExtend32(uint32_t x)
Definition: MathExtras.h:585
void AnalyzeReturn(const SmallVectorImpl< ISD::OutputArg > &Outs, CCAssignFn Fn)
void setStackPointerRegisterToSaveRestore(unsigned R)
SDValue getTruncStore(SDValue Chain, SDLoc dl, SDValue Val, SDValue Ptr, MachinePointerInfo PtrInfo, EVT TVT, bool isNonTemporal, bool isVolatile, unsigned Alignment, const MDNode *TBAAInfo=0)
static unsigned getReg(const void *D, unsigned RC, unsigned RegNo)
op_iterator op_end() const
const TargetMachine & getTarget() const
void setLibcallName(RTLIB::Libcall Call, const char *Name)
Rename the default libcall routine name for the specified libcall.
MachineSDNode * getMachineNode(unsigned Opcode, SDLoc dl, EVT VT)
virtual const TargetRegisterInfo * getRegisterInfo() const
bool isNON_EXTLoad(const SDNode *N)
FSINCOS - Compute both fsin and fcos as a single operation.
Definition: ISDOpcodes.h:454
bool hasType(EVT vt) const
unsigned MaxStoresPerMemcpy
Specify maximum bytes of store instructions per memcpy call.
static bool isConstantOrUndef(int Op, int Val)
EVT getValueType() const
TLSModel::Model getTLSModel(const GlobalValue *GV) const
unsigned getReg() const
getReg - Returns the register number.
bool isFloatingPoint() const
isFloatingPoint - Return true if this is a FP, or a vector FP type.
Definition: ValueTypes.h:651
void insert(iterator MBBI, MachineBasicBlock *MBB)
SDValue getFrameIndex(int FI, EVT VT, bool isTarget=false)
void setReturnAddressIsTaken(bool s)
bool isSimple() const
Definition: ValueTypes.h:640
unsigned getAlignment() const
void setMinStackArgumentAlignment(unsigned Align)
Set the minimum stack alignment of an argument (in log2(bytes)).
void setSchedulingPreference(Sched::Preference Pref)
Specify the target scheduling preference.
bool SelectAddressRegImm(SDValue N, SDValue &Disp, SDValue &Base, SelectionDAG &DAG, bool Aligned) const
LLVM Value Representation.
Definition: Value.h:66
SDValue getRegister(unsigned Reg, EVT VT)
void setUseUnderscoreSetJmp(bool Val)
virtual bool isLegalAddressingMode(const AddrMode &AM, Type *Ty) const
const MachineInstrBuilder & addMBB(MachineBasicBlock *MBB, unsigned char TargetFlags=0) const
void setInsertFencesForAtomic(bool fence)
SDValue getValueType(EVT)
static cl::opt< bool > DisableILPPref("disable-ppc-ilp-pref", cl::desc("disable setting the node scheduling preference to ILP on PPC"), cl::Hidden)
unsigned getDarwinDirective() const
Definition: PPCSubtarget.h:123
BasicBlockListType::iterator iterator
const TargetLowering & getTargetLoweringInfo() const
Definition: SelectionDAG.h:282
void ReplaceAllUsesOfValueWith(SDValue From, SDValue To)
bool isPowerOf2_32(uint32_t Value)
Definition: MathExtras.h:354
bool hasFRSQRTE() const
Definition: PPCSubtarget.h:182
APInt LLVM_ATTRIBUTE_UNUSED_RESULT zext(unsigned width) const
Zero extend to a new width.
Definition: APInt.cpp:983
bool hasFRES() const
Definition: PPCSubtarget.h:181
SDValue getMergeValues(const SDValue *Ops, unsigned NumOps, SDLoc dl)
getMergeValues - Create a MERGE_VALUES node from the given operands.
unsigned MaxStoresPerMemset
Specify maximum number of store instructions per memset call.
const MCRegisterInfo & MRI
bool hasFCPSGN() const
Definition: PPCSubtarget.h:178
SDValue getTargetConstant(uint64_t Val, EVT VT)
Definition: SelectionDAG.h:408
bool hasFRE() const
Definition: PPCSubtarget.h:180
SDValue getSetCC(SDLoc DL, EVT VT, SDValue LHS, SDValue RHS, ISD::CondCode Cond)
Definition: SelectionDAG.h:653
unsigned getLocMemOffset() const
SDValue getEntryNode() const
Definition: SelectionDAG.h:332
void setPow2DivIsCheap(bool isCheap=true)
int CreateFixedObject(uint64_t Size, int64_t SPOffset, bool Immutable)
TRUNCATE - Completely drop the high bits.
Definition: ISDOpcodes.h:363
unsigned getAlignment() const
unsigned AllocateReg(unsigned Reg)
const MachineInstrBuilder & addRegMask(const uint32_t *Mask) const
virtual ConstraintWeight getSingleConstraintMatchWeight(AsmOperandInfo &info, const char *constraint) const
unsigned getByValTypeAlignment(Type *Ty) const
const MachineInstrBuilder & addReg(unsigned RegNo, unsigned flags=0, unsigned SubReg=0) const
virtual MachineBasicBlock * EmitInstrWithCustomInserter(MachineInstr *MI, MachineBasicBlock *MBB) const
unsigned AllocateStack(unsigned Size, unsigned Align)
void addSuccessor(MachineBasicBlock *succ, uint32_t weight=0)
bool hasFRSQRTES() const
Definition: PPCSubtarget.h:183
int64_t getObjectSize(int ObjectIdx) const
EVT changeVectorElementTypeToInteger() const
Definition: ValueTypes.h:626
unsigned getResNo() const
getResNo - Convenience function for get().getResNo().
static unsigned getReturnSaveOffset(bool isPPC64, bool isDarwinABI)
MVT getSimpleVT() const
Definition: ValueTypes.h:749
DebugLoc getDebugLoc() const
Definition: MachineInstr.h:244
void setIndexedStoreAction(unsigned IdxMode, MVT VT, LegalizeAction Action)
void refineAlignment(const MachineMemOperand *NewMMO)
uint64_t getZExtValue() const
mmo_iterator memoperands_begin() const
Access to memory operands of the instruction.
Definition: MachineInstr.h:291
unsigned getVectorNumElements() const
Definition: ValueTypes.h:771