LLVM API Documentation

 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Groups Pages
X86FastISel.cpp
Go to the documentation of this file.
1 //===-- X86FastISel.cpp - X86 FastISel implementation ---------------------===//
2 //
3 // The LLVM Compiler Infrastructure
4 //
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
7 //
8 //===----------------------------------------------------------------------===//
9 //
10 // This file defines the X86-specific support for the FastISel class. Much
11 // of the target-specific code is generated by tablegen in the file
12 // X86GenFastISel.inc, which is #included here.
13 //
14 //===----------------------------------------------------------------------===//
15 
16 #include "X86.h"
17 #include "X86CallingConv.h"
18 #include "X86ISelLowering.h"
19 #include "X86InstrBuilder.h"
20 #include "X86RegisterInfo.h"
21 #include "X86Subtarget.h"
22 #include "X86TargetMachine.h"
23 #include "llvm/CodeGen/Analysis.h"
24 #include "llvm/CodeGen/FastISel.h"
29 #include "llvm/IR/CallingConv.h"
30 #include "llvm/IR/DerivedTypes.h"
31 #include "llvm/IR/GlobalAlias.h"
32 #include "llvm/IR/GlobalVariable.h"
33 #include "llvm/IR/Instructions.h"
34 #include "llvm/IR/IntrinsicInst.h"
35 #include "llvm/IR/Operator.h"
36 #include "llvm/Support/CallSite.h"
40 using namespace llvm;
41 
42 namespace {
43 
44 class X86FastISel : public FastISel {
45  /// Subtarget - Keep a pointer to the X86Subtarget around so that we can
46  /// make the right decision when generating code for different targets.
47  const X86Subtarget *Subtarget;
48 
49  /// X86ScalarSSEf32, X86ScalarSSEf64 - Select between SSE or x87
50  /// floating point ops.
51  /// When SSE is available, use it for f32 operations.
52  /// When SSE2 is available, use it for f64 operations.
53  bool X86ScalarSSEf64;
54  bool X86ScalarSSEf32;
55 
56 public:
57  explicit X86FastISel(FunctionLoweringInfo &funcInfo,
58  const TargetLibraryInfo *libInfo)
59  : FastISel(funcInfo, libInfo) {
60  Subtarget = &TM.getSubtarget<X86Subtarget>();
61  X86ScalarSSEf64 = Subtarget->hasSSE2();
62  X86ScalarSSEf32 = Subtarget->hasSSE1();
63  }
64 
65  virtual bool TargetSelectInstruction(const Instruction *I);
66 
67  /// \brief The specified machine instr operand is a vreg, and that
68  /// vreg is being provided by the specified load instruction. If possible,
69  /// try to fold the load as an operand to the instruction, returning true if
70  /// possible.
71  virtual bool tryToFoldLoadIntoMI(MachineInstr *MI, unsigned OpNo,
72  const LoadInst *LI);
73 
74  virtual bool FastLowerArguments();
75 
76 #include "X86GenFastISel.inc"
77 
78 private:
79  bool X86FastEmitCompare(const Value *LHS, const Value *RHS, EVT VT);
80 
81  bool X86FastEmitLoad(EVT VT, const X86AddressMode &AM, unsigned &RR);
82 
83  bool X86FastEmitStore(EVT VT, const Value *Val, const X86AddressMode &AM,
84  bool Aligned = false);
85  bool X86FastEmitStore(EVT VT, unsigned ValReg, const X86AddressMode &AM,
86  bool Aligned = false);
87 
88  bool X86FastEmitExtend(ISD::NodeType Opc, EVT DstVT, unsigned Src, EVT SrcVT,
89  unsigned &ResultReg);
90 
91  bool X86SelectAddress(const Value *V, X86AddressMode &AM);
92  bool X86SelectCallAddress(const Value *V, X86AddressMode &AM);
93 
94  bool X86SelectLoad(const Instruction *I);
95 
96  bool X86SelectStore(const Instruction *I);
97 
98  bool X86SelectRet(const Instruction *I);
99 
100  bool X86SelectCmp(const Instruction *I);
101 
102  bool X86SelectZExt(const Instruction *I);
103 
104  bool X86SelectBranch(const Instruction *I);
105 
106  bool X86SelectShift(const Instruction *I);
107 
108  bool X86SelectDivRem(const Instruction *I);
109 
110  bool X86SelectSelect(const Instruction *I);
111 
112  bool X86SelectTrunc(const Instruction *I);
113 
114  bool X86SelectFPExt(const Instruction *I);
115  bool X86SelectFPTrunc(const Instruction *I);
116 
117  bool X86VisitIntrinsicCall(const IntrinsicInst &I);
118  bool X86SelectCall(const Instruction *I);
119 
120  bool DoSelectCall(const Instruction *I, const char *MemIntName);
121 
122  const X86InstrInfo *getInstrInfo() const {
123  return getTargetMachine()->getInstrInfo();
124  }
125  const X86TargetMachine *getTargetMachine() const {
126  return static_cast<const X86TargetMachine *>(&TM);
127  }
128 
129  bool handleConstantAddresses(const Value *V, X86AddressMode &AM);
130 
131  unsigned TargetMaterializeConstant(const Constant *C);
132 
133  unsigned TargetMaterializeAlloca(const AllocaInst *C);
134 
135  unsigned TargetMaterializeFloatZero(const ConstantFP *CF);
136 
137  /// isScalarFPTypeInSSEReg - Return true if the specified scalar FP type is
138  /// computed in an SSE register, not on the X87 floating point stack.
139  bool isScalarFPTypeInSSEReg(EVT VT) const {
140  return (VT == MVT::f64 && X86ScalarSSEf64) || // f64 is when SSE2
141  (VT == MVT::f32 && X86ScalarSSEf32); // f32 is when SSE1
142  }
143 
144  bool isTypeLegal(Type *Ty, MVT &VT, bool AllowI1 = false);
145 
146  bool IsMemcpySmall(uint64_t Len);
147 
148  bool TryEmitSmallMemcpy(X86AddressMode DestAM,
149  X86AddressMode SrcAM, uint64_t Len);
150 };
151 
152 } // end anonymous namespace.
153 
154 bool X86FastISel::isTypeLegal(Type *Ty, MVT &VT, bool AllowI1) {
155  EVT evt = TLI.getValueType(Ty, /*HandleUnknown=*/true);
156  if (evt == MVT::Other || !evt.isSimple())
157  // Unhandled type. Halt "fast" selection and bail.
158  return false;
159 
160  VT = evt.getSimpleVT();
161  // For now, require SSE/SSE2 for performing floating-point operations,
162  // since x87 requires additional work.
163  if (VT == MVT::f64 && !X86ScalarSSEf64)
164  return false;
165  if (VT == MVT::f32 && !X86ScalarSSEf32)
166  return false;
167  // Similarly, no f80 support yet.
168  if (VT == MVT::f80)
169  return false;
170  // We only handle legal types. For example, on x86-32 the instruction
171  // selector contains all of the 64-bit instructions from x86-64,
172  // under the assumption that i64 won't be used if the target doesn't
173  // support it.
174  return (AllowI1 && VT == MVT::i1) || TLI.isTypeLegal(VT);
175 }
176 
177 #include "X86GenCallingConv.inc"
178 
179 /// X86FastEmitLoad - Emit a machine instruction to load a value of type VT.
180 /// The address is either pre-computed, i.e. Ptr, or a GlobalAddress, i.e. GV.
181 /// Return true and the result register by reference if it is possible.
182 bool X86FastISel::X86FastEmitLoad(EVT VT, const X86AddressMode &AM,
183  unsigned &ResultReg) {
184  // Get opcode and regclass of the output for the given load instruction.
185  unsigned Opc = 0;
186  const TargetRegisterClass *RC = NULL;
187  switch (VT.getSimpleVT().SimpleTy) {
188  default: return false;
189  case MVT::i1:
190  case MVT::i8:
191  Opc = X86::MOV8rm;
192  RC = &X86::GR8RegClass;
193  break;
194  case MVT::i16:
195  Opc = X86::MOV16rm;
196  RC = &X86::GR16RegClass;
197  break;
198  case MVT::i32:
199  Opc = X86::MOV32rm;
200  RC = &X86::GR32RegClass;
201  break;
202  case MVT::i64:
203  // Must be in x86-64 mode.
204  Opc = X86::MOV64rm;
205  RC = &X86::GR64RegClass;
206  break;
207  case MVT::f32:
208  if (X86ScalarSSEf32) {
209  Opc = Subtarget->hasAVX() ? X86::VMOVSSrm : X86::MOVSSrm;
210  RC = &X86::FR32RegClass;
211  } else {
212  Opc = X86::LD_Fp32m;
213  RC = &X86::RFP32RegClass;
214  }
215  break;
216  case MVT::f64:
217  if (X86ScalarSSEf64) {
218  Opc = Subtarget->hasAVX() ? X86::VMOVSDrm : X86::MOVSDrm;
219  RC = &X86::FR64RegClass;
220  } else {
221  Opc = X86::LD_Fp64m;
222  RC = &X86::RFP64RegClass;
223  }
224  break;
225  case MVT::f80:
226  // No f80 support yet.
227  return false;
228  }
229 
230  ResultReg = createResultReg(RC);
231  addFullAddress(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt,
232  DL, TII.get(Opc), ResultReg), AM);
233  return true;
234 }
235 
236 /// X86FastEmitStore - Emit a machine instruction to store a value Val of
237 /// type VT. The address is either pre-computed, consisted of a base ptr, Ptr
238 /// and a displacement offset, or a GlobalAddress,
239 /// i.e. V. Return true if it is possible.
240 bool
241 X86FastISel::X86FastEmitStore(EVT VT, unsigned ValReg,
242  const X86AddressMode &AM, bool Aligned) {
243  // Get opcode and regclass of the output for the given store instruction.
244  unsigned Opc = 0;
245  switch (VT.getSimpleVT().SimpleTy) {
246  case MVT::f80: // No f80 support yet.
247  default: return false;
248  case MVT::i1: {
249  // Mask out all but lowest bit.
250  unsigned AndResult = createResultReg(&X86::GR8RegClass);
251  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL,
252  TII.get(X86::AND8ri), AndResult).addReg(ValReg).addImm(1);
253  ValReg = AndResult;
254  }
255  // FALLTHROUGH, handling i1 as i8.
256  case MVT::i8: Opc = X86::MOV8mr; break;
257  case MVT::i16: Opc = X86::MOV16mr; break;
258  case MVT::i32: Opc = X86::MOV32mr; break;
259  case MVT::i64: Opc = X86::MOV64mr; break; // Must be in x86-64 mode.
260  case MVT::f32:
261  Opc = X86ScalarSSEf32 ?
262  (Subtarget->hasAVX() ? X86::VMOVSSmr : X86::MOVSSmr) : X86::ST_Fp32m;
263  break;
264  case MVT::f64:
265  Opc = X86ScalarSSEf64 ?
266  (Subtarget->hasAVX() ? X86::VMOVSDmr : X86::MOVSDmr) : X86::ST_Fp64m;
267  break;
268  case MVT::v4f32:
269  if (Aligned)
270  Opc = Subtarget->hasAVX() ? X86::VMOVAPSmr : X86::MOVAPSmr;
271  else
272  Opc = Subtarget->hasAVX() ? X86::VMOVUPSmr : X86::MOVUPSmr;
273  break;
274  case MVT::v2f64:
275  if (Aligned)
276  Opc = Subtarget->hasAVX() ? X86::VMOVAPDmr : X86::MOVAPDmr;
277  else
278  Opc = Subtarget->hasAVX() ? X86::VMOVUPDmr : X86::MOVUPDmr;
279  break;
280  case MVT::v4i32:
281  case MVT::v2i64:
282  case MVT::v8i16:
283  case MVT::v16i8:
284  if (Aligned)
285  Opc = Subtarget->hasAVX() ? X86::VMOVDQAmr : X86::MOVDQAmr;
286  else
287  Opc = Subtarget->hasAVX() ? X86::VMOVDQUmr : X86::MOVDQUmr;
288  break;
289  }
290 
291  addFullAddress(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt,
292  DL, TII.get(Opc)), AM).addReg(ValReg);
293  return true;
294 }
295 
296 bool X86FastISel::X86FastEmitStore(EVT VT, const Value *Val,
297  const X86AddressMode &AM, bool Aligned) {
298  // Handle 'null' like i32/i64 0.
299  if (isa<ConstantPointerNull>(Val))
300  Val = Constant::getNullValue(TD.getIntPtrType(Val->getContext()));
301 
302  // If this is a store of a simple constant, fold the constant into the store.
303  if (const ConstantInt *CI = dyn_cast<ConstantInt>(Val)) {
304  unsigned Opc = 0;
305  bool Signed = true;
306  switch (VT.getSimpleVT().SimpleTy) {
307  default: break;
308  case MVT::i1: Signed = false; // FALLTHROUGH to handle as i8.
309  case MVT::i8: Opc = X86::MOV8mi; break;
310  case MVT::i16: Opc = X86::MOV16mi; break;
311  case MVT::i32: Opc = X86::MOV32mi; break;
312  case MVT::i64:
313  // Must be a 32-bit sign extended value.
314  if (isInt<32>(CI->getSExtValue()))
315  Opc = X86::MOV64mi32;
316  break;
317  }
318 
319  if (Opc) {
320  addFullAddress(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt,
321  DL, TII.get(Opc)), AM)
322  .addImm(Signed ? (uint64_t) CI->getSExtValue() :
323  CI->getZExtValue());
324  return true;
325  }
326  }
327 
328  unsigned ValReg = getRegForValue(Val);
329  if (ValReg == 0)
330  return false;
331 
332  return X86FastEmitStore(VT, ValReg, AM, Aligned);
333 }
334 
335 /// X86FastEmitExtend - Emit a machine instruction to extend a value Src of
336 /// type SrcVT to type DstVT using the specified extension opcode Opc (e.g.
337 /// ISD::SIGN_EXTEND).
338 bool X86FastISel::X86FastEmitExtend(ISD::NodeType Opc, EVT DstVT,
339  unsigned Src, EVT SrcVT,
340  unsigned &ResultReg) {
341  unsigned RR = FastEmit_r(SrcVT.getSimpleVT(), DstVT.getSimpleVT(), Opc,
342  Src, /*TODO: Kill=*/false);
343  if (RR == 0)
344  return false;
345 
346  ResultReg = RR;
347  return true;
348 }
349 
350 bool X86FastISel::handleConstantAddresses(const Value *V, X86AddressMode &AM) {
351  // Handle constant address.
352  if (const GlobalValue *GV = dyn_cast<GlobalValue>(V)) {
353  // Can't handle alternate code models yet.
354  if (TM.getCodeModel() != CodeModel::Small)
355  return false;
356 
357  // Can't handle TLS yet.
358  if (const GlobalVariable *GVar = dyn_cast<GlobalVariable>(GV))
359  if (GVar->isThreadLocal())
360  return false;
361 
362  // Can't handle TLS yet, part 2 (this is slightly crazy, but this is how
363  // it works...).
364  if (const GlobalAlias *GA = dyn_cast<GlobalAlias>(GV))
365  if (const GlobalVariable *GVar =
366  dyn_cast_or_null<GlobalVariable>(GA->resolveAliasedGlobal(false)))
367  if (GVar->isThreadLocal())
368  return false;
369 
370  // RIP-relative addresses can't have additional register operands, so if
371  // we've already folded stuff into the addressing mode, just force the
372  // global value into its own register, which we can use as the basereg.
373  if (!Subtarget->isPICStyleRIPRel() ||
374  (AM.Base.Reg == 0 && AM.IndexReg == 0)) {
375  // Okay, we've committed to selecting this global. Set up the address.
376  AM.GV = GV;
377 
378  // Allow the subtarget to classify the global.
379  unsigned char GVFlags = Subtarget->ClassifyGlobalReference(GV, TM);
380 
381  // If this reference is relative to the pic base, set it now.
382  if (isGlobalRelativeToPICBase(GVFlags)) {
383  // FIXME: How do we know Base.Reg is free??
384  AM.Base.Reg = getInstrInfo()->getGlobalBaseReg(FuncInfo.MF);
385  }
386 
387  // Unless the ABI requires an extra load, return a direct reference to
388  // the global.
389  if (!isGlobalStubReference(GVFlags)) {
390  if (Subtarget->isPICStyleRIPRel()) {
391  // Use rip-relative addressing if we can. Above we verified that the
392  // base and index registers are unused.
393  assert(AM.Base.Reg == 0 && AM.IndexReg == 0);
394  AM.Base.Reg = X86::RIP;
395  }
396  AM.GVOpFlags = GVFlags;
397  return true;
398  }
399 
400  // Ok, we need to do a load from a stub. If we've already loaded from
401  // this stub, reuse the loaded pointer, otherwise emit the load now.
402  DenseMap<const Value*, unsigned>::iterator I = LocalValueMap.find(V);
403  unsigned LoadReg;
404  if (I != LocalValueMap.end() && I->second != 0) {
405  LoadReg = I->second;
406  } else {
407  // Issue load from stub.
408  unsigned Opc = 0;
409  const TargetRegisterClass *RC = NULL;
410  X86AddressMode StubAM;
411  StubAM.Base.Reg = AM.Base.Reg;
412  StubAM.GV = GV;
413  StubAM.GVOpFlags = GVFlags;
414 
415  // Prepare for inserting code in the local-value area.
416  SavePoint SaveInsertPt = enterLocalValueArea();
417 
418  if (TLI.getPointerTy() == MVT::i64) {
419  Opc = X86::MOV64rm;
420  RC = &X86::GR64RegClass;
421 
422  if (Subtarget->isPICStyleRIPRel())
423  StubAM.Base.Reg = X86::RIP;
424  } else {
425  Opc = X86::MOV32rm;
426  RC = &X86::GR32RegClass;
427  }
428 
429  LoadReg = createResultReg(RC);
430  MachineInstrBuilder LoadMI =
431  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(Opc), LoadReg);
432  addFullAddress(LoadMI, StubAM);
433 
434  // Ok, back to normal mode.
435  leaveLocalValueArea(SaveInsertPt);
436 
437  // Prevent loading GV stub multiple times in same MBB.
438  LocalValueMap[V] = LoadReg;
439  }
440 
441  // Now construct the final address. Note that the Disp, Scale,
442  // and Index values may already be set here.
443  AM.Base.Reg = LoadReg;
444  AM.GV = 0;
445  return true;
446  }
447  }
448 
449  // If all else fails, try to materialize the value in a register.
450  if (!AM.GV || !Subtarget->isPICStyleRIPRel()) {
451  if (AM.Base.Reg == 0) {
452  AM.Base.Reg = getRegForValue(V);
453  return AM.Base.Reg != 0;
454  }
455  if (AM.IndexReg == 0) {
456  assert(AM.Scale == 1 && "Scale with no index!");
457  AM.IndexReg = getRegForValue(V);
458  return AM.IndexReg != 0;
459  }
460  }
461 
462  return false;
463 }
464 
465 /// X86SelectAddress - Attempt to fill in an address from the given value.
466 ///
467 bool X86FastISel::X86SelectAddress(const Value *V, X86AddressMode &AM) {
469 redo_gep:
470  const User *U = NULL;
471  unsigned Opcode = Instruction::UserOp1;
472  if (const Instruction *I = dyn_cast<Instruction>(V)) {
473  // Don't walk into other basic blocks; it's possible we haven't
474  // visited them yet, so the instructions may not yet be assigned
475  // virtual registers.
476  if (FuncInfo.StaticAllocaMap.count(static_cast<const AllocaInst *>(V)) ||
477  FuncInfo.MBBMap[I->getParent()] == FuncInfo.MBB) {
478  Opcode = I->getOpcode();
479  U = I;
480  }
481  } else if (const ConstantExpr *C = dyn_cast<ConstantExpr>(V)) {
482  Opcode = C->getOpcode();
483  U = C;
484  }
485 
486  if (PointerType *Ty = dyn_cast<PointerType>(V->getType()))
487  if (Ty->getAddressSpace() > 255)
488  // Fast instruction selection doesn't support the special
489  // address spaces.
490  return false;
491 
492  switch (Opcode) {
493  default: break;
494  case Instruction::BitCast:
495  // Look past bitcasts.
496  return X86SelectAddress(U->getOperand(0), AM);
497 
498  case Instruction::IntToPtr:
499  // Look past no-op inttoptrs.
500  if (TLI.getValueType(U->getOperand(0)->getType()) == TLI.getPointerTy())
501  return X86SelectAddress(U->getOperand(0), AM);
502  break;
503 
504  case Instruction::PtrToInt:
505  // Look past no-op ptrtoints.
506  if (TLI.getValueType(U->getType()) == TLI.getPointerTy())
507  return X86SelectAddress(U->getOperand(0), AM);
508  break;
509 
510  case Instruction::Alloca: {
511  // Do static allocas.
512  const AllocaInst *A = cast<AllocaInst>(V);
514  FuncInfo.StaticAllocaMap.find(A);
515  if (SI != FuncInfo.StaticAllocaMap.end()) {
517  AM.Base.FrameIndex = SI->second;
518  return true;
519  }
520  break;
521  }
522 
523  case Instruction::Add: {
524  // Adds of constants are common and easy enough.
525  if (const ConstantInt *CI = dyn_cast<ConstantInt>(U->getOperand(1))) {
526  uint64_t Disp = (int32_t)AM.Disp + (uint64_t)CI->getSExtValue();
527  // They have to fit in the 32-bit signed displacement field though.
528  if (isInt<32>(Disp)) {
529  AM.Disp = (uint32_t)Disp;
530  return X86SelectAddress(U->getOperand(0), AM);
531  }
532  }
533  break;
534  }
535 
536  case Instruction::GetElementPtr: {
537  X86AddressMode SavedAM = AM;
538 
539  // Pattern-match simple GEPs.
540  uint64_t Disp = (int32_t)AM.Disp;
541  unsigned IndexReg = AM.IndexReg;
542  unsigned Scale = AM.Scale;
544  // Iterate through the indices, folding what we can. Constants can be
545  // folded, and one dynamic index can be handled, if the scale is supported.
546  for (User::const_op_iterator i = U->op_begin() + 1, e = U->op_end();
547  i != e; ++i, ++GTI) {
548  const Value *Op = *i;
549  if (StructType *STy = dyn_cast<StructType>(*GTI)) {
550  const StructLayout *SL = TD.getStructLayout(STy);
551  Disp += SL->getElementOffset(cast<ConstantInt>(Op)->getZExtValue());
552  continue;
553  }
554 
555  // A array/variable index is always of the form i*S where S is the
556  // constant scale size. See if we can push the scale into immediates.
557  uint64_t S = TD.getTypeAllocSize(GTI.getIndexedType());
558  for (;;) {
559  if (const ConstantInt *CI = dyn_cast<ConstantInt>(Op)) {
560  // Constant-offset addressing.
561  Disp += CI->getSExtValue() * S;
562  break;
563  }
564  if (canFoldAddIntoGEP(U, Op)) {
565  // A compatible add with a constant operand. Fold the constant.
566  ConstantInt *CI =
567  cast<ConstantInt>(cast<AddOperator>(Op)->getOperand(1));
568  Disp += CI->getSExtValue() * S;
569  // Iterate on the other operand.
570  Op = cast<AddOperator>(Op)->getOperand(0);
571  continue;
572  }
573  if (IndexReg == 0 &&
574  (!AM.GV || !Subtarget->isPICStyleRIPRel()) &&
575  (S == 1 || S == 2 || S == 4 || S == 8)) {
576  // Scaled-index addressing.
577  Scale = S;
578  IndexReg = getRegForGEPIndex(Op).first;
579  if (IndexReg == 0)
580  return false;
581  break;
582  }
583  // Unsupported.
584  goto unsupported_gep;
585  }
586  }
587 
588  // Check for displacement overflow.
589  if (!isInt<32>(Disp))
590  break;
591 
592  AM.IndexReg = IndexReg;
593  AM.Scale = Scale;
594  AM.Disp = (uint32_t)Disp;
595  GEPs.push_back(V);
596 
597  if (const GetElementPtrInst *GEP =
598  dyn_cast<GetElementPtrInst>(U->getOperand(0))) {
599  // Ok, the GEP indices were covered by constant-offset and scaled-index
600  // addressing. Update the address state and move on to examining the base.
601  V = GEP;
602  goto redo_gep;
603  } else if (X86SelectAddress(U->getOperand(0), AM)) {
604  return true;
605  }
606 
607  // If we couldn't merge the gep value into this addr mode, revert back to
608  // our address and just match the value instead of completely failing.
609  AM = SavedAM;
610 
612  I = GEPs.rbegin(), E = GEPs.rend(); I != E; ++I)
613  if (handleConstantAddresses(*I, AM))
614  return true;
615 
616  return false;
617  unsupported_gep:
618  // Ok, the GEP indices weren't all covered.
619  break;
620  }
621  }
622 
623  return handleConstantAddresses(V, AM);
624 }
625 
626 /// X86SelectCallAddress - Attempt to fill in an address from the given value.
627 ///
628 bool X86FastISel::X86SelectCallAddress(const Value *V, X86AddressMode &AM) {
629  const User *U = NULL;
630  unsigned Opcode = Instruction::UserOp1;
631  const Instruction *I = dyn_cast<Instruction>(V);
632  // Record if the value is defined in the same basic block.
633  //
634  // This information is crucial to know whether or not folding an
635  // operand is valid.
636  // Indeed, FastISel generates or reuses a virtual register for all
637  // operands of all instructions it selects. Obviously, the definition and
638  // its uses must use the same virtual register otherwise the produced
639  // code is incorrect.
640  // Before instruction selection, FunctionLoweringInfo::set sets the virtual
641  // registers for values that are alive across basic blocks. This ensures
642  // that the values are consistently set between across basic block, even
643  // if different instruction selection mechanisms are used (e.g., a mix of
644  // SDISel and FastISel).
645  // For values local to a basic block, the instruction selection process
646  // generates these virtual registers with whatever method is appropriate
647  // for its needs. In particular, FastISel and SDISel do not share the way
648  // local virtual registers are set.
649  // Therefore, this is impossible (or at least unsafe) to share values
650  // between basic blocks unless they use the same instruction selection
651  // method, which is not guarantee for X86.
652  // Moreover, things like hasOneUse could not be used accurately, if we
653  // allow to reference values across basic blocks whereas they are not
654  // alive across basic blocks initially.
655  bool InMBB = true;
656  if (I) {
657  Opcode = I->getOpcode();
658  U = I;
659  InMBB = I->getParent() == FuncInfo.MBB->getBasicBlock();
660  } else if (const ConstantExpr *C = dyn_cast<ConstantExpr>(V)) {
661  Opcode = C->getOpcode();
662  U = C;
663  }
664 
665  switch (Opcode) {
666  default: break;
667  case Instruction::BitCast:
668  // Look past bitcasts if its operand is in the same BB.
669  if (InMBB)
670  return X86SelectCallAddress(U->getOperand(0), AM);
671  break;
672 
673  case Instruction::IntToPtr:
674  // Look past no-op inttoptrs if its operand is in the same BB.
675  if (InMBB &&
676  TLI.getValueType(U->getOperand(0)->getType()) == TLI.getPointerTy())
677  return X86SelectCallAddress(U->getOperand(0), AM);
678  break;
679 
680  case Instruction::PtrToInt:
681  // Look past no-op ptrtoints if its operand is in the same BB.
682  if (InMBB &&
683  TLI.getValueType(U->getType()) == TLI.getPointerTy())
684  return X86SelectCallAddress(U->getOperand(0), AM);
685  break;
686  }
687 
688  // Handle constant address.
689  if (const GlobalValue *GV = dyn_cast<GlobalValue>(V)) {
690  // Can't handle alternate code models yet.
691  if (TM.getCodeModel() != CodeModel::Small)
692  return false;
693 
694  // RIP-relative addresses can't have additional register operands.
695  if (Subtarget->isPICStyleRIPRel() &&
696  (AM.Base.Reg != 0 || AM.IndexReg != 0))
697  return false;
698 
699  // Can't handle DLLImport.
700  if (GV->hasDLLImportLinkage())
701  return false;
702 
703  // Can't handle TLS.
704  if (const GlobalVariable *GVar = dyn_cast<GlobalVariable>(GV))
705  if (GVar->isThreadLocal())
706  return false;
707 
708  // Okay, we've committed to selecting this global. Set up the basic address.
709  AM.GV = GV;
710 
711  // No ABI requires an extra load for anything other than DLLImport, which
712  // we rejected above. Return a direct reference to the global.
713  if (Subtarget->isPICStyleRIPRel()) {
714  // Use rip-relative addressing if we can. Above we verified that the
715  // base and index registers are unused.
716  assert(AM.Base.Reg == 0 && AM.IndexReg == 0);
717  AM.Base.Reg = X86::RIP;
718  } else if (Subtarget->isPICStyleStubPIC()) {
720  } else if (Subtarget->isPICStyleGOT()) {
722  }
723 
724  return true;
725  }
726 
727  // If all else fails, try to materialize the value in a register.
728  if (!AM.GV || !Subtarget->isPICStyleRIPRel()) {
729  if (AM.Base.Reg == 0) {
730  AM.Base.Reg = getRegForValue(V);
731  return AM.Base.Reg != 0;
732  }
733  if (AM.IndexReg == 0) {
734  assert(AM.Scale == 1 && "Scale with no index!");
735  AM.IndexReg = getRegForValue(V);
736  return AM.IndexReg != 0;
737  }
738  }
739 
740  return false;
741 }
742 
743 
744 /// X86SelectStore - Select and emit code to implement store instructions.
745 bool X86FastISel::X86SelectStore(const Instruction *I) {
746  // Atomic stores need special handling.
747  const StoreInst *S = cast<StoreInst>(I);
748 
749  if (S->isAtomic())
750  return false;
751 
752  unsigned SABIAlignment =
753  TD.getABITypeAlignment(S->getValueOperand()->getType());
754  bool Aligned = S->getAlignment() == 0 || S->getAlignment() >= SABIAlignment;
755 
756  MVT VT;
757  if (!isTypeLegal(I->getOperand(0)->getType(), VT, /*AllowI1=*/true))
758  return false;
759 
760  X86AddressMode AM;
761  if (!X86SelectAddress(I->getOperand(1), AM))
762  return false;
763 
764  return X86FastEmitStore(VT, I->getOperand(0), AM, Aligned);
765 }
766 
767 /// X86SelectRet - Select and emit code to implement ret instructions.
768 bool X86FastISel::X86SelectRet(const Instruction *I) {
769  const ReturnInst *Ret = cast<ReturnInst>(I);
770  const Function &F = *I->getParent()->getParent();
771  const X86MachineFunctionInfo *X86MFInfo =
772  FuncInfo.MF->getInfo<X86MachineFunctionInfo>();
773 
774  if (!FuncInfo.CanLowerReturn)
775  return false;
776 
777  CallingConv::ID CC = F.getCallingConv();
778  if (CC != CallingConv::C &&
779  CC != CallingConv::Fast &&
782  return false;
783 
784  if (Subtarget->isCallingConvWin64(CC))
785  return false;
786 
787  // Don't handle popping bytes on return for now.
788  if (X86MFInfo->getBytesToPopOnReturn() != 0)
789  return false;
790 
791  // fastcc with -tailcallopt is intended to provide a guaranteed
792  // tail call optimization. Fastisel doesn't know how to do that.
793  if (CC == CallingConv::Fast && TM.Options.GuaranteedTailCallOpt)
794  return false;
795 
796  // Let SDISel handle vararg functions.
797  if (F.isVarArg())
798  return false;
799 
800  // Build a list of return value registers.
801  SmallVector<unsigned, 4> RetRegs;
802 
803  if (Ret->getNumOperands() > 0) {
805  GetReturnInfo(F.getReturnType(), F.getAttributes(), Outs, TLI);
806 
807  // Analyze operands of the call, assigning locations to each operand.
809  CCState CCInfo(CC, F.isVarArg(), *FuncInfo.MF, TM, ValLocs,
810  I->getContext());
811  CCInfo.AnalyzeReturn(Outs, RetCC_X86);
812 
813  const Value *RV = Ret->getOperand(0);
814  unsigned Reg = getRegForValue(RV);
815  if (Reg == 0)
816  return false;
817 
818  // Only handle a single return value for now.
819  if (ValLocs.size() != 1)
820  return false;
821 
822  CCValAssign &VA = ValLocs[0];
823 
824  // Don't bother handling odd stuff for now.
825  if (VA.getLocInfo() != CCValAssign::Full)
826  return false;
827  // Only handle register returns for now.
828  if (!VA.isRegLoc())
829  return false;
830 
831  // The calling-convention tables for x87 returns don't tell
832  // the whole story.
833  if (VA.getLocReg() == X86::ST0 || VA.getLocReg() == X86::ST1)
834  return false;
835 
836  unsigned SrcReg = Reg + VA.getValNo();
837  EVT SrcVT = TLI.getValueType(RV->getType());
838  EVT DstVT = VA.getValVT();
839  // Special handling for extended integers.
840  if (SrcVT != DstVT) {
841  if (SrcVT != MVT::i1 && SrcVT != MVT::i8 && SrcVT != MVT::i16)
842  return false;
843 
844  if (!Outs[0].Flags.isZExt() && !Outs[0].Flags.isSExt())
845  return false;
846 
847  assert(DstVT == MVT::i32 && "X86 should always ext to i32");
848 
849  if (SrcVT == MVT::i1) {
850  if (Outs[0].Flags.isSExt())
851  return false;
852  SrcReg = FastEmitZExtFromI1(MVT::i8, SrcReg, /*TODO: Kill=*/false);
853  SrcVT = MVT::i8;
854  }
855  unsigned Op = Outs[0].Flags.isZExt() ? ISD::ZERO_EXTEND :
857  SrcReg = FastEmit_r(SrcVT.getSimpleVT(), DstVT.getSimpleVT(), Op,
858  SrcReg, /*TODO: Kill=*/false);
859  }
860 
861  // Make the copy.
862  unsigned DstReg = VA.getLocReg();
863  const TargetRegisterClass* SrcRC = MRI.getRegClass(SrcReg);
864  // Avoid a cross-class copy. This is very unlikely.
865  if (!SrcRC->contains(DstReg))
866  return false;
867  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(TargetOpcode::COPY),
868  DstReg).addReg(SrcReg);
869 
870  // Add register to return instruction.
871  RetRegs.push_back(VA.getLocReg());
872  }
873 
874  // The x86-64 ABI for returning structs by value requires that we copy
875  // the sret argument into %rax for the return. We saved the argument into
876  // a virtual register in the entry block, so now we copy the value out
877  // and into %rax. We also do the same with %eax for Win32.
878  if (F.hasStructRetAttr() &&
879  (Subtarget->is64Bit() || Subtarget->isTargetWindows())) {
880  unsigned Reg = X86MFInfo->getSRetReturnReg();
881  assert(Reg &&
882  "SRetReturnReg should have been set in LowerFormalArguments()!");
883  unsigned RetReg = Subtarget->is64Bit() ? X86::RAX : X86::EAX;
884  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(TargetOpcode::COPY),
885  RetReg).addReg(Reg);
886  RetRegs.push_back(RetReg);
887  }
888 
889  // Now emit the RET.
890  MachineInstrBuilder MIB =
891  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(X86::RET));
892  for (unsigned i = 0, e = RetRegs.size(); i != e; ++i)
893  MIB.addReg(RetRegs[i], RegState::Implicit);
894  return true;
895 }
896 
897 /// X86SelectLoad - Select and emit code to implement load instructions.
898 ///
899 bool X86FastISel::X86SelectLoad(const Instruction *I) {
900  // Atomic loads need special handling.
901  if (cast<LoadInst>(I)->isAtomic())
902  return false;
903 
904  MVT VT;
905  if (!isTypeLegal(I->getType(), VT, /*AllowI1=*/true))
906  return false;
907 
908  X86AddressMode AM;
909  if (!X86SelectAddress(I->getOperand(0), AM))
910  return false;
911 
912  unsigned ResultReg = 0;
913  if (X86FastEmitLoad(VT, AM, ResultReg)) {
914  UpdateValueMap(I, ResultReg);
915  return true;
916  }
917  return false;
918 }
919 
920 static unsigned X86ChooseCmpOpcode(EVT VT, const X86Subtarget *Subtarget) {
921  bool HasAVX = Subtarget->hasAVX();
922  bool X86ScalarSSEf32 = Subtarget->hasSSE1();
923  bool X86ScalarSSEf64 = Subtarget->hasSSE2();
924 
925  switch (VT.getSimpleVT().SimpleTy) {
926  default: return 0;
927  case MVT::i8: return X86::CMP8rr;
928  case MVT::i16: return X86::CMP16rr;
929  case MVT::i32: return X86::CMP32rr;
930  case MVT::i64: return X86::CMP64rr;
931  case MVT::f32:
932  return X86ScalarSSEf32 ? (HasAVX ? X86::VUCOMISSrr : X86::UCOMISSrr) : 0;
933  case MVT::f64:
934  return X86ScalarSSEf64 ? (HasAVX ? X86::VUCOMISDrr : X86::UCOMISDrr) : 0;
935  }
936 }
937 
938 /// X86ChooseCmpImmediateOpcode - If we have a comparison with RHS as the RHS
939 /// of the comparison, return an opcode that works for the compare (e.g.
940 /// CMP32ri) otherwise return 0.
941 static unsigned X86ChooseCmpImmediateOpcode(EVT VT, const ConstantInt *RHSC) {
942  switch (VT.getSimpleVT().SimpleTy) {
943  // Otherwise, we can't fold the immediate into this comparison.
944  default: return 0;
945  case MVT::i8: return X86::CMP8ri;
946  case MVT::i16: return X86::CMP16ri;
947  case MVT::i32: return X86::CMP32ri;
948  case MVT::i64:
949  // 64-bit comparisons are only valid if the immediate fits in a 32-bit sext
950  // field.
951  if ((int)RHSC->getSExtValue() == RHSC->getSExtValue())
952  return X86::CMP64ri32;
953  return 0;
954  }
955 }
956 
957 bool X86FastISel::X86FastEmitCompare(const Value *Op0, const Value *Op1,
958  EVT VT) {
959  unsigned Op0Reg = getRegForValue(Op0);
960  if (Op0Reg == 0) return false;
961 
962  // Handle 'null' like i32/i64 0.
963  if (isa<ConstantPointerNull>(Op1))
964  Op1 = Constant::getNullValue(TD.getIntPtrType(Op0->getContext()));
965 
966  // We have two options: compare with register or immediate. If the RHS of
967  // the compare is an immediate that we can fold into this compare, use
968  // CMPri, otherwise use CMPrr.
969  if (const ConstantInt *Op1C = dyn_cast<ConstantInt>(Op1)) {
970  if (unsigned CompareImmOpc = X86ChooseCmpImmediateOpcode(VT, Op1C)) {
971  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(CompareImmOpc))
972  .addReg(Op0Reg)
973  .addImm(Op1C->getSExtValue());
974  return true;
975  }
976  }
977 
978  unsigned CompareOpc = X86ChooseCmpOpcode(VT, Subtarget);
979  if (CompareOpc == 0) return false;
980 
981  unsigned Op1Reg = getRegForValue(Op1);
982  if (Op1Reg == 0) return false;
983  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(CompareOpc))
984  .addReg(Op0Reg)
985  .addReg(Op1Reg);
986 
987  return true;
988 }
989 
990 bool X86FastISel::X86SelectCmp(const Instruction *I) {
991  const CmpInst *CI = cast<CmpInst>(I);
992 
993  MVT VT;
994  if (!isTypeLegal(I->getOperand(0)->getType(), VT))
995  return false;
996 
997  unsigned ResultReg = createResultReg(&X86::GR8RegClass);
998  unsigned SetCCOpc;
999  bool SwapArgs; // false -> compare Op0, Op1. true -> compare Op1, Op0.
1000  switch (CI->getPredicate()) {
1001  case CmpInst::FCMP_OEQ: {
1002  if (!X86FastEmitCompare(CI->getOperand(0), CI->getOperand(1), VT))
1003  return false;
1004 
1005  unsigned EReg = createResultReg(&X86::GR8RegClass);
1006  unsigned NPReg = createResultReg(&X86::GR8RegClass);
1007  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(X86::SETEr), EReg);
1008  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL,
1009  TII.get(X86::SETNPr), NPReg);
1010  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL,
1011  TII.get(X86::AND8rr), ResultReg).addReg(NPReg).addReg(EReg);
1012  UpdateValueMap(I, ResultReg);
1013  return true;
1014  }
1015  case CmpInst::FCMP_UNE: {
1016  if (!X86FastEmitCompare(CI->getOperand(0), CI->getOperand(1), VT))
1017  return false;
1018 
1019  unsigned NEReg = createResultReg(&X86::GR8RegClass);
1020  unsigned PReg = createResultReg(&X86::GR8RegClass);
1021  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(X86::SETNEr), NEReg);
1022  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(X86::SETPr), PReg);
1023  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(X86::OR8rr),ResultReg)
1024  .addReg(PReg).addReg(NEReg);
1025  UpdateValueMap(I, ResultReg);
1026  return true;
1027  }
1028  case CmpInst::FCMP_OGT: SwapArgs = false; SetCCOpc = X86::SETAr; break;
1029  case CmpInst::FCMP_OGE: SwapArgs = false; SetCCOpc = X86::SETAEr; break;
1030  case CmpInst::FCMP_OLT: SwapArgs = true; SetCCOpc = X86::SETAr; break;
1031  case CmpInst::FCMP_OLE: SwapArgs = true; SetCCOpc = X86::SETAEr; break;
1032  case CmpInst::FCMP_ONE: SwapArgs = false; SetCCOpc = X86::SETNEr; break;
1033  case CmpInst::FCMP_ORD: SwapArgs = false; SetCCOpc = X86::SETNPr; break;
1034  case CmpInst::FCMP_UNO: SwapArgs = false; SetCCOpc = X86::SETPr; break;
1035  case CmpInst::FCMP_UEQ: SwapArgs = false; SetCCOpc = X86::SETEr; break;
1036  case CmpInst::FCMP_UGT: SwapArgs = true; SetCCOpc = X86::SETBr; break;
1037  case CmpInst::FCMP_UGE: SwapArgs = true; SetCCOpc = X86::SETBEr; break;
1038  case CmpInst::FCMP_ULT: SwapArgs = false; SetCCOpc = X86::SETBr; break;
1039  case CmpInst::FCMP_ULE: SwapArgs = false; SetCCOpc = X86::SETBEr; break;
1040 
1041  case CmpInst::ICMP_EQ: SwapArgs = false; SetCCOpc = X86::SETEr; break;
1042  case CmpInst::ICMP_NE: SwapArgs = false; SetCCOpc = X86::SETNEr; break;
1043  case CmpInst::ICMP_UGT: SwapArgs = false; SetCCOpc = X86::SETAr; break;
1044  case CmpInst::ICMP_UGE: SwapArgs = false; SetCCOpc = X86::SETAEr; break;
1045  case CmpInst::ICMP_ULT: SwapArgs = false; SetCCOpc = X86::SETBr; break;
1046  case CmpInst::ICMP_ULE: SwapArgs = false; SetCCOpc = X86::SETBEr; break;
1047  case CmpInst::ICMP_SGT: SwapArgs = false; SetCCOpc = X86::SETGr; break;
1048  case CmpInst::ICMP_SGE: SwapArgs = false; SetCCOpc = X86::SETGEr; break;
1049  case CmpInst::ICMP_SLT: SwapArgs = false; SetCCOpc = X86::SETLr; break;
1050  case CmpInst::ICMP_SLE: SwapArgs = false; SetCCOpc = X86::SETLEr; break;
1051  default:
1052  return false;
1053  }
1054 
1055  const Value *Op0 = CI->getOperand(0), *Op1 = CI->getOperand(1);
1056  if (SwapArgs)
1057  std::swap(Op0, Op1);
1058 
1059  // Emit a compare of Op0/Op1.
1060  if (!X86FastEmitCompare(Op0, Op1, VT))
1061  return false;
1062 
1063  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(SetCCOpc), ResultReg);
1064  UpdateValueMap(I, ResultReg);
1065  return true;
1066 }
1067 
1068 bool X86FastISel::X86SelectZExt(const Instruction *I) {
1069  EVT DstVT = TLI.getValueType(I->getType());
1070  if (!TLI.isTypeLegal(DstVT))
1071  return false;
1072 
1073  unsigned ResultReg = getRegForValue(I->getOperand(0));
1074  if (ResultReg == 0)
1075  return false;
1076 
1077  // Handle zero-extension from i1 to i8, which is common.
1078  MVT SrcVT = TLI.getSimpleValueType(I->getOperand(0)->getType());
1079  if (SrcVT.SimpleTy == MVT::i1) {
1080  // Set the high bits to zero.
1081  ResultReg = FastEmitZExtFromI1(MVT::i8, ResultReg, /*TODO: Kill=*/false);
1082  SrcVT = MVT::i8;
1083 
1084  if (ResultReg == 0)
1085  return false;
1086  }
1087 
1088  if (DstVT == MVT::i64) {
1089  // Handle extension to 64-bits via sub-register shenanigans.
1090  unsigned MovInst;
1091 
1092  switch (SrcVT.SimpleTy) {
1093  case MVT::i8: MovInst = X86::MOVZX32rr8; break;
1094  case MVT::i16: MovInst = X86::MOVZX32rr16; break;
1095  case MVT::i32: MovInst = X86::MOV32rr; break;
1096  default: llvm_unreachable("Unexpected zext to i64 source type");
1097  }
1098 
1099  unsigned Result32 = createResultReg(&X86::GR32RegClass);
1100  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(MovInst), Result32)
1101  .addReg(ResultReg);
1102 
1103  ResultReg = createResultReg(&X86::GR64RegClass);
1104  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(TargetOpcode::SUBREG_TO_REG),
1105  ResultReg)
1106  .addImm(0).addReg(Result32).addImm(X86::sub_32bit);
1107  } else if (DstVT != MVT::i8) {
1108  ResultReg = FastEmit_r(MVT::i8, DstVT.getSimpleVT(), ISD::ZERO_EXTEND,
1109  ResultReg, /*Kill=*/true);
1110  if (ResultReg == 0)
1111  return false;
1112  }
1113 
1114  UpdateValueMap(I, ResultReg);
1115  return true;
1116 }
1117 
1118 
1119 bool X86FastISel::X86SelectBranch(const Instruction *I) {
1120  // Unconditional branches are selected by tablegen-generated code.
1121  // Handle a conditional branch.
1122  const BranchInst *BI = cast<BranchInst>(I);
1123  MachineBasicBlock *TrueMBB = FuncInfo.MBBMap[BI->getSuccessor(0)];
1124  MachineBasicBlock *FalseMBB = FuncInfo.MBBMap[BI->getSuccessor(1)];
1125 
1126  // Fold the common case of a conditional branch with a comparison
1127  // in the same block (values defined on other blocks may not have
1128  // initialized registers).
1129  if (const CmpInst *CI = dyn_cast<CmpInst>(BI->getCondition())) {
1130  if (CI->hasOneUse() && CI->getParent() == I->getParent()) {
1131  EVT VT = TLI.getValueType(CI->getOperand(0)->getType());
1132 
1133  // Try to take advantage of fallthrough opportunities.
1135  if (FuncInfo.MBB->isLayoutSuccessor(TrueMBB)) {
1136  std::swap(TrueMBB, FalseMBB);
1137  Predicate = CmpInst::getInversePredicate(Predicate);
1138  }
1139 
1140  bool SwapArgs; // false -> compare Op0, Op1. true -> compare Op1, Op0.
1141  unsigned BranchOpc; // Opcode to jump on, e.g. "X86::JA"
1142 
1143  switch (Predicate) {
1144  case CmpInst::FCMP_OEQ:
1145  std::swap(TrueMBB, FalseMBB);
1146  Predicate = CmpInst::FCMP_UNE;
1147  // FALL THROUGH
1148  case CmpInst::FCMP_UNE: SwapArgs = false; BranchOpc = X86::JNE_4; break;
1149  case CmpInst::FCMP_OGT: SwapArgs = false; BranchOpc = X86::JA_4; break;
1150  case CmpInst::FCMP_OGE: SwapArgs = false; BranchOpc = X86::JAE_4; break;
1151  case CmpInst::FCMP_OLT: SwapArgs = true; BranchOpc = X86::JA_4; break;
1152  case CmpInst::FCMP_OLE: SwapArgs = true; BranchOpc = X86::JAE_4; break;
1153  case CmpInst::FCMP_ONE: SwapArgs = false; BranchOpc = X86::JNE_4; break;
1154  case CmpInst::FCMP_ORD: SwapArgs = false; BranchOpc = X86::JNP_4; break;
1155  case CmpInst::FCMP_UNO: SwapArgs = false; BranchOpc = X86::JP_4; break;
1156  case CmpInst::FCMP_UEQ: SwapArgs = false; BranchOpc = X86::JE_4; break;
1157  case CmpInst::FCMP_UGT: SwapArgs = true; BranchOpc = X86::JB_4; break;
1158  case CmpInst::FCMP_UGE: SwapArgs = true; BranchOpc = X86::JBE_4; break;
1159  case CmpInst::FCMP_ULT: SwapArgs = false; BranchOpc = X86::JB_4; break;
1160  case CmpInst::FCMP_ULE: SwapArgs = false; BranchOpc = X86::JBE_4; break;
1161 
1162  case CmpInst::ICMP_EQ: SwapArgs = false; BranchOpc = X86::JE_4; break;
1163  case CmpInst::ICMP_NE: SwapArgs = false; BranchOpc = X86::JNE_4; break;
1164  case CmpInst::ICMP_UGT: SwapArgs = false; BranchOpc = X86::JA_4; break;
1165  case CmpInst::ICMP_UGE: SwapArgs = false; BranchOpc = X86::JAE_4; break;
1166  case CmpInst::ICMP_ULT: SwapArgs = false; BranchOpc = X86::JB_4; break;
1167  case CmpInst::ICMP_ULE: SwapArgs = false; BranchOpc = X86::JBE_4; break;
1168  case CmpInst::ICMP_SGT: SwapArgs = false; BranchOpc = X86::JG_4; break;
1169  case CmpInst::ICMP_SGE: SwapArgs = false; BranchOpc = X86::JGE_4; break;
1170  case CmpInst::ICMP_SLT: SwapArgs = false; BranchOpc = X86::JL_4; break;
1171  case CmpInst::ICMP_SLE: SwapArgs = false; BranchOpc = X86::JLE_4; break;
1172  default:
1173  return false;
1174  }
1175 
1176  const Value *Op0 = CI->getOperand(0), *Op1 = CI->getOperand(1);
1177  if (SwapArgs)
1178  std::swap(Op0, Op1);
1179 
1180  // Emit a compare of the LHS and RHS, setting the flags.
1181  if (!X86FastEmitCompare(Op0, Op1, VT))
1182  return false;
1183 
1184  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(BranchOpc))
1185  .addMBB(TrueMBB);
1186 
1187  if (Predicate == CmpInst::FCMP_UNE) {
1188  // X86 requires a second branch to handle UNE (and OEQ,
1189  // which is mapped to UNE above).
1190  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(X86::JP_4))
1191  .addMBB(TrueMBB);
1192  }
1193 
1194  FastEmitBranch(FalseMBB, DL);
1195  FuncInfo.MBB->addSuccessor(TrueMBB);
1196  return true;
1197  }
1198  } else if (TruncInst *TI = dyn_cast<TruncInst>(BI->getCondition())) {
1199  // Handle things like "%cond = trunc i32 %X to i1 / br i1 %cond", which
1200  // typically happen for _Bool and C++ bools.
1201  MVT SourceVT;
1202  if (TI->hasOneUse() && TI->getParent() == I->getParent() &&
1203  isTypeLegal(TI->getOperand(0)->getType(), SourceVT)) {
1204  unsigned TestOpc = 0;
1205  switch (SourceVT.SimpleTy) {
1206  default: break;
1207  case MVT::i8: TestOpc = X86::TEST8ri; break;
1208  case MVT::i16: TestOpc = X86::TEST16ri; break;
1209  case MVT::i32: TestOpc = X86::TEST32ri; break;
1210  case MVT::i64: TestOpc = X86::TEST64ri32; break;
1211  }
1212  if (TestOpc) {
1213  unsigned OpReg = getRegForValue(TI->getOperand(0));
1214  if (OpReg == 0) return false;
1215  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(TestOpc))
1216  .addReg(OpReg).addImm(1);
1217 
1218  unsigned JmpOpc = X86::JNE_4;
1219  if (FuncInfo.MBB->isLayoutSuccessor(TrueMBB)) {
1220  std::swap(TrueMBB, FalseMBB);
1221  JmpOpc = X86::JE_4;
1222  }
1223 
1224  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(JmpOpc))
1225  .addMBB(TrueMBB);
1226  FastEmitBranch(FalseMBB, DL);
1227  FuncInfo.MBB->addSuccessor(TrueMBB);
1228  return true;
1229  }
1230  }
1231  }
1232 
1233  // Otherwise do a clumsy setcc and re-test it.
1234  // Note that i1 essentially gets ANY_EXTEND'ed to i8 where it isn't used
1235  // in an explicit cast, so make sure to handle that correctly.
1236  unsigned OpReg = getRegForValue(BI->getCondition());
1237  if (OpReg == 0) return false;
1238 
1239  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(X86::TEST8ri))
1240  .addReg(OpReg).addImm(1);
1241  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(X86::JNE_4))
1242  .addMBB(TrueMBB);
1243  FastEmitBranch(FalseMBB, DL);
1244  FuncInfo.MBB->addSuccessor(TrueMBB);
1245  return true;
1246 }
1247 
1248 bool X86FastISel::X86SelectShift(const Instruction *I) {
1249  unsigned CReg = 0, OpReg = 0;
1250  const TargetRegisterClass *RC = NULL;
1251  if (I->getType()->isIntegerTy(8)) {
1252  CReg = X86::CL;
1253  RC = &X86::GR8RegClass;
1254  switch (I->getOpcode()) {
1255  case Instruction::LShr: OpReg = X86::SHR8rCL; break;
1256  case Instruction::AShr: OpReg = X86::SAR8rCL; break;
1257  case Instruction::Shl: OpReg = X86::SHL8rCL; break;
1258  default: return false;
1259  }
1260  } else if (I->getType()->isIntegerTy(16)) {
1261  CReg = X86::CX;
1262  RC = &X86::GR16RegClass;
1263  switch (I->getOpcode()) {
1264  case Instruction::LShr: OpReg = X86::SHR16rCL; break;
1265  case Instruction::AShr: OpReg = X86::SAR16rCL; break;
1266  case Instruction::Shl: OpReg = X86::SHL16rCL; break;
1267  default: return false;
1268  }
1269  } else if (I->getType()->isIntegerTy(32)) {
1270  CReg = X86::ECX;
1271  RC = &X86::GR32RegClass;
1272  switch (I->getOpcode()) {
1273  case Instruction::LShr: OpReg = X86::SHR32rCL; break;
1274  case Instruction::AShr: OpReg = X86::SAR32rCL; break;
1275  case Instruction::Shl: OpReg = X86::SHL32rCL; break;
1276  default: return false;
1277  }
1278  } else if (I->getType()->isIntegerTy(64)) {
1279  CReg = X86::RCX;
1280  RC = &X86::GR64RegClass;
1281  switch (I->getOpcode()) {
1282  case Instruction::LShr: OpReg = X86::SHR64rCL; break;
1283  case Instruction::AShr: OpReg = X86::SAR64rCL; break;
1284  case Instruction::Shl: OpReg = X86::SHL64rCL; break;
1285  default: return false;
1286  }
1287  } else {
1288  return false;
1289  }
1290 
1291  MVT VT;
1292  if (!isTypeLegal(I->getType(), VT))
1293  return false;
1294 
1295  unsigned Op0Reg = getRegForValue(I->getOperand(0));
1296  if (Op0Reg == 0) return false;
1297 
1298  unsigned Op1Reg = getRegForValue(I->getOperand(1));
1299  if (Op1Reg == 0) return false;
1300  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(TargetOpcode::COPY),
1301  CReg).addReg(Op1Reg);
1302 
1303  // The shift instruction uses X86::CL. If we defined a super-register
1304  // of X86::CL, emit a subreg KILL to precisely describe what we're doing here.
1305  if (CReg != X86::CL)
1306  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL,
1307  TII.get(TargetOpcode::KILL), X86::CL)
1308  .addReg(CReg, RegState::Kill);
1309 
1310  unsigned ResultReg = createResultReg(RC);
1311  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(OpReg), ResultReg)
1312  .addReg(Op0Reg);
1313  UpdateValueMap(I, ResultReg);
1314  return true;
1315 }
1316 
1317 bool X86FastISel::X86SelectDivRem(const Instruction *I) {
1318  const static unsigned NumTypes = 4; // i8, i16, i32, i64
1319  const static unsigned NumOps = 4; // SDiv, SRem, UDiv, URem
1320  const static bool S = true; // IsSigned
1321  const static bool U = false; // !IsSigned
1322  const static unsigned Copy = TargetOpcode::COPY;
1323  // For the X86 DIV/IDIV instruction, in most cases the dividend
1324  // (numerator) must be in a specific register pair highreg:lowreg,
1325  // producing the quotient in lowreg and the remainder in highreg.
1326  // For most data types, to set up the instruction, the dividend is
1327  // copied into lowreg, and lowreg is sign-extended or zero-extended
1328  // into highreg. The exception is i8, where the dividend is defined
1329  // as a single register rather than a register pair, and we
1330  // therefore directly sign-extend or zero-extend the dividend into
1331  // lowreg, instead of copying, and ignore the highreg.
1332  const static struct DivRemEntry {
1333  // The following portion depends only on the data type.
1334  const TargetRegisterClass *RC;
1335  unsigned LowInReg; // low part of the register pair
1336  unsigned HighInReg; // high part of the register pair
1337  // The following portion depends on both the data type and the operation.
1338  struct DivRemResult {
1339  unsigned OpDivRem; // The specific DIV/IDIV opcode to use.
1340  unsigned OpSignExtend; // Opcode for sign-extending lowreg into
1341  // highreg, or copying a zero into highreg.
1342  unsigned OpCopy; // Opcode for copying dividend into lowreg, or
1343  // zero/sign-extending into lowreg for i8.
1344  unsigned DivRemResultReg; // Register containing the desired result.
1345  bool IsOpSigned; // Whether to use signed or unsigned form.
1346  } ResultTable[NumOps];
1347  } OpTable[NumTypes] = {
1348  { &X86::GR8RegClass, X86::AX, 0, {
1349  { X86::IDIV8r, 0, X86::MOVSX16rr8, X86::AL, S }, // SDiv
1350  { X86::IDIV8r, 0, X86::MOVSX16rr8, X86::AH, S }, // SRem
1351  { X86::DIV8r, 0, X86::MOVZX16rr8, X86::AL, U }, // UDiv
1352  { X86::DIV8r, 0, X86::MOVZX16rr8, X86::AH, U }, // URem
1353  }
1354  }, // i8
1355  { &X86::GR16RegClass, X86::AX, X86::DX, {
1356  { X86::IDIV16r, X86::CWD, Copy, X86::AX, S }, // SDiv
1357  { X86::IDIV16r, X86::CWD, Copy, X86::DX, S }, // SRem
1358  { X86::DIV16r, X86::MOV32r0, Copy, X86::AX, U }, // UDiv
1359  { X86::DIV16r, X86::MOV32r0, Copy, X86::DX, U }, // URem
1360  }
1361  }, // i16
1362  { &X86::GR32RegClass, X86::EAX, X86::EDX, {
1363  { X86::IDIV32r, X86::CDQ, Copy, X86::EAX, S }, // SDiv
1364  { X86::IDIV32r, X86::CDQ, Copy, X86::EDX, S }, // SRem
1365  { X86::DIV32r, X86::MOV32r0, Copy, X86::EAX, U }, // UDiv
1366  { X86::DIV32r, X86::MOV32r0, Copy, X86::EDX, U }, // URem
1367  }
1368  }, // i32
1369  { &X86::GR64RegClass, X86::RAX, X86::RDX, {
1370  { X86::IDIV64r, X86::CQO, Copy, X86::RAX, S }, // SDiv
1371  { X86::IDIV64r, X86::CQO, Copy, X86::RDX, S }, // SRem
1372  { X86::DIV64r, X86::MOV32r0, Copy, X86::RAX, U }, // UDiv
1373  { X86::DIV64r, X86::MOV32r0, Copy, X86::RDX, U }, // URem
1374  }
1375  }, // i64
1376  };
1377 
1378  MVT VT;
1379  if (!isTypeLegal(I->getType(), VT))
1380  return false;
1381 
1382  unsigned TypeIndex, OpIndex;
1383  switch (VT.SimpleTy) {
1384  default: return false;
1385  case MVT::i8: TypeIndex = 0; break;
1386  case MVT::i16: TypeIndex = 1; break;
1387  case MVT::i32: TypeIndex = 2; break;
1388  case MVT::i64: TypeIndex = 3;
1389  if (!Subtarget->is64Bit())
1390  return false;
1391  break;
1392  }
1393 
1394  switch (I->getOpcode()) {
1395  default: llvm_unreachable("Unexpected div/rem opcode");
1396  case Instruction::SDiv: OpIndex = 0; break;
1397  case Instruction::SRem: OpIndex = 1; break;
1398  case Instruction::UDiv: OpIndex = 2; break;
1399  case Instruction::URem: OpIndex = 3; break;
1400  }
1401 
1402  const DivRemEntry &TypeEntry = OpTable[TypeIndex];
1403  const DivRemEntry::DivRemResult &OpEntry = TypeEntry.ResultTable[OpIndex];
1404  unsigned Op0Reg = getRegForValue(I->getOperand(0));
1405  if (Op0Reg == 0)
1406  return false;
1407  unsigned Op1Reg = getRegForValue(I->getOperand(1));
1408  if (Op1Reg == 0)
1409  return false;
1410 
1411  // Move op0 into low-order input register.
1412  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL,
1413  TII.get(OpEntry.OpCopy), TypeEntry.LowInReg).addReg(Op0Reg);
1414  // Zero-extend or sign-extend into high-order input register.
1415  if (OpEntry.OpSignExtend) {
1416  if (OpEntry.IsOpSigned)
1417  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL,
1418  TII.get(OpEntry.OpSignExtend));
1419  else {
1420  unsigned Zero32 = createResultReg(&X86::GR32RegClass);
1421  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL,
1422  TII.get(X86::MOV32r0), Zero32);
1423 
1424  // Copy the zero into the appropriate sub/super/identical physical
1425  // register. Unfortunately the operations needed are not uniform enough to
1426  // fit neatly into the table above.
1427  if (VT.SimpleTy == MVT::i16) {
1428  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL,
1429  TII.get(Copy), TypeEntry.HighInReg)
1430  .addReg(Zero32, 0, X86::sub_16bit);
1431  } else if (VT.SimpleTy == MVT::i32) {
1432  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL,
1433  TII.get(Copy), TypeEntry.HighInReg)
1434  .addReg(Zero32);
1435  } else if (VT.SimpleTy == MVT::i64) {
1436  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL,
1437  TII.get(TargetOpcode::SUBREG_TO_REG), TypeEntry.HighInReg)
1438  .addImm(0).addReg(Zero32).addImm(X86::sub_32bit);
1439  }
1440  }
1441  }
1442  // Generate the DIV/IDIV instruction.
1443  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL,
1444  TII.get(OpEntry.OpDivRem)).addReg(Op1Reg);
1445  // For i8 remainder, we can't reference AH directly, as we'll end
1446  // up with bogus copies like %R9B = COPY %AH. Reference AX
1447  // instead to prevent AH references in a REX instruction.
1448  //
1449  // The current assumption of the fast register allocator is that isel
1450  // won't generate explicit references to the GPR8_NOREX registers. If
1451  // the allocator and/or the backend get enhanced to be more robust in
1452  // that regard, this can be, and should be, removed.
1453  unsigned ResultReg = 0;
1454  if ((I->getOpcode() == Instruction::SRem ||
1455  I->getOpcode() == Instruction::URem) &&
1456  OpEntry.DivRemResultReg == X86::AH && Subtarget->is64Bit()) {
1457  unsigned SourceSuperReg = createResultReg(&X86::GR16RegClass);
1458  unsigned ResultSuperReg = createResultReg(&X86::GR16RegClass);
1459  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL,
1460  TII.get(Copy), SourceSuperReg).addReg(X86::AX);
1461 
1462  // Shift AX right by 8 bits instead of using AH.
1463  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(X86::SHR16ri),
1464  ResultSuperReg).addReg(SourceSuperReg).addImm(8);
1465 
1466  // Now reference the 8-bit subreg of the result.
1467  ResultReg = FastEmitInst_extractsubreg(MVT::i8, ResultSuperReg,
1468  /*Kill=*/true, X86::sub_8bit);
1469  }
1470  // Copy the result out of the physreg if we haven't already.
1471  if (!ResultReg) {
1472  ResultReg = createResultReg(TypeEntry.RC);
1473  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(Copy), ResultReg)
1474  .addReg(OpEntry.DivRemResultReg);
1475  }
1476  UpdateValueMap(I, ResultReg);
1477 
1478  return true;
1479 }
1480 
1481 bool X86FastISel::X86SelectSelect(const Instruction *I) {
1482  MVT VT;
1483  if (!isTypeLegal(I->getType(), VT))
1484  return false;
1485 
1486  // We only use cmov here, if we don't have a cmov instruction bail.
1487  if (!Subtarget->hasCMov()) return false;
1488 
1489  unsigned Opc = 0;
1490  const TargetRegisterClass *RC = NULL;
1491  if (VT == MVT::i16) {
1492  Opc = X86::CMOVE16rr;
1493  RC = &X86::GR16RegClass;
1494  } else if (VT == MVT::i32) {
1495  Opc = X86::CMOVE32rr;
1496  RC = &X86::GR32RegClass;
1497  } else if (VT == MVT::i64) {
1498  Opc = X86::CMOVE64rr;
1499  RC = &X86::GR64RegClass;
1500  } else {
1501  return false;
1502  }
1503 
1504  unsigned Op0Reg = getRegForValue(I->getOperand(0));
1505  if (Op0Reg == 0) return false;
1506  unsigned Op1Reg = getRegForValue(I->getOperand(1));
1507  if (Op1Reg == 0) return false;
1508  unsigned Op2Reg = getRegForValue(I->getOperand(2));
1509  if (Op2Reg == 0) return false;
1510 
1511  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(X86::TEST8rr))
1512  .addReg(Op0Reg).addReg(Op0Reg);
1513  unsigned ResultReg = createResultReg(RC);
1514  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(Opc), ResultReg)
1515  .addReg(Op1Reg).addReg(Op2Reg);
1516  UpdateValueMap(I, ResultReg);
1517  return true;
1518 }
1519 
1520 bool X86FastISel::X86SelectFPExt(const Instruction *I) {
1521  // fpext from float to double.
1522  if (X86ScalarSSEf64 &&
1523  I->getType()->isDoubleTy()) {
1524  const Value *V = I->getOperand(0);
1525  if (V->getType()->isFloatTy()) {
1526  unsigned OpReg = getRegForValue(V);
1527  if (OpReg == 0) return false;
1528  unsigned ResultReg = createResultReg(&X86::FR64RegClass);
1529  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL,
1530  TII.get(X86::CVTSS2SDrr), ResultReg)
1531  .addReg(OpReg);
1532  UpdateValueMap(I, ResultReg);
1533  return true;
1534  }
1535  }
1536 
1537  return false;
1538 }
1539 
1540 bool X86FastISel::X86SelectFPTrunc(const Instruction *I) {
1541  if (X86ScalarSSEf64) {
1542  if (I->getType()->isFloatTy()) {
1543  const Value *V = I->getOperand(0);
1544  if (V->getType()->isDoubleTy()) {
1545  unsigned OpReg = getRegForValue(V);
1546  if (OpReg == 0) return false;
1547  unsigned ResultReg = createResultReg(&X86::FR32RegClass);
1548  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL,
1549  TII.get(X86::CVTSD2SSrr), ResultReg)
1550  .addReg(OpReg);
1551  UpdateValueMap(I, ResultReg);
1552  return true;
1553  }
1554  }
1555  }
1556 
1557  return false;
1558 }
1559 
1560 bool X86FastISel::X86SelectTrunc(const Instruction *I) {
1561  EVT SrcVT = TLI.getValueType(I->getOperand(0)->getType());
1562  EVT DstVT = TLI.getValueType(I->getType());
1563 
1564  // This code only handles truncation to byte.
1565  if (DstVT != MVT::i8 && DstVT != MVT::i1)
1566  return false;
1567  if (!TLI.isTypeLegal(SrcVT))
1568  return false;
1569 
1570  unsigned InputReg = getRegForValue(I->getOperand(0));
1571  if (!InputReg)
1572  // Unhandled operand. Halt "fast" selection and bail.
1573  return false;
1574 
1575  if (SrcVT == MVT::i8) {
1576  // Truncate from i8 to i1; no code needed.
1577  UpdateValueMap(I, InputReg);
1578  return true;
1579  }
1580 
1581  if (!Subtarget->is64Bit()) {
1582  // If we're on x86-32; we can't extract an i8 from a general register.
1583  // First issue a copy to GR16_ABCD or GR32_ABCD.
1584  const TargetRegisterClass *CopyRC = (SrcVT == MVT::i16) ?
1585  (const TargetRegisterClass*)&X86::GR16_ABCDRegClass :
1586  (const TargetRegisterClass*)&X86::GR32_ABCDRegClass;
1587  unsigned CopyReg = createResultReg(CopyRC);
1588  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(TargetOpcode::COPY),
1589  CopyReg).addReg(InputReg);
1590  InputReg = CopyReg;
1591  }
1592 
1593  // Issue an extract_subreg.
1594  unsigned ResultReg = FastEmitInst_extractsubreg(MVT::i8,
1595  InputReg, /*Kill=*/true,
1596  X86::sub_8bit);
1597  if (!ResultReg)
1598  return false;
1599 
1600  UpdateValueMap(I, ResultReg);
1601  return true;
1602 }
1603 
1604 bool X86FastISel::IsMemcpySmall(uint64_t Len) {
1605  return Len <= (Subtarget->is64Bit() ? 32 : 16);
1606 }
1607 
1608 bool X86FastISel::TryEmitSmallMemcpy(X86AddressMode DestAM,
1609  X86AddressMode SrcAM, uint64_t Len) {
1610 
1611  // Make sure we don't bloat code by inlining very large memcpy's.
1612  if (!IsMemcpySmall(Len))
1613  return false;
1614 
1615  bool i64Legal = Subtarget->is64Bit();
1616 
1617  // We don't care about alignment here since we just emit integer accesses.
1618  while (Len) {
1619  MVT VT;
1620  if (Len >= 8 && i64Legal)
1621  VT = MVT::i64;
1622  else if (Len >= 4)
1623  VT = MVT::i32;
1624  else if (Len >= 2)
1625  VT = MVT::i16;
1626  else {
1627  VT = MVT::i8;
1628  }
1629 
1630  unsigned Reg;
1631  bool RV = X86FastEmitLoad(VT, SrcAM, Reg);
1632  RV &= X86FastEmitStore(VT, Reg, DestAM);
1633  assert(RV && "Failed to emit load or store??");
1634 
1635  unsigned Size = VT.getSizeInBits()/8;
1636  Len -= Size;
1637  DestAM.Disp += Size;
1638  SrcAM.Disp += Size;
1639  }
1640 
1641  return true;
1642 }
1643 
1644 bool X86FastISel::X86VisitIntrinsicCall(const IntrinsicInst &I) {
1645  // FIXME: Handle more intrinsics.
1646  switch (I.getIntrinsicID()) {
1647  default: return false;
1648  case Intrinsic::memcpy: {
1649  const MemCpyInst &MCI = cast<MemCpyInst>(I);
1650  // Don't handle volatile or variable length memcpys.
1651  if (MCI.isVolatile())
1652  return false;
1653 
1654  if (isa<ConstantInt>(MCI.getLength())) {
1655  // Small memcpy's are common enough that we want to do them
1656  // without a call if possible.
1657  uint64_t Len = cast<ConstantInt>(MCI.getLength())->getZExtValue();
1658  if (IsMemcpySmall(Len)) {
1659  X86AddressMode DestAM, SrcAM;
1660  if (!X86SelectAddress(MCI.getRawDest(), DestAM) ||
1661  !X86SelectAddress(MCI.getRawSource(), SrcAM))
1662  return false;
1663  TryEmitSmallMemcpy(DestAM, SrcAM, Len);
1664  return true;
1665  }
1666  }
1667 
1668  unsigned SizeWidth = Subtarget->is64Bit() ? 64 : 32;
1669  if (!MCI.getLength()->getType()->isIntegerTy(SizeWidth))
1670  return false;
1671 
1672  if (MCI.getSourceAddressSpace() > 255 || MCI.getDestAddressSpace() > 255)
1673  return false;
1674 
1675  return DoSelectCall(&I, "memcpy");
1676  }
1677  case Intrinsic::memset: {
1678  const MemSetInst &MSI = cast<MemSetInst>(I);
1679 
1680  if (MSI.isVolatile())
1681  return false;
1682 
1683  unsigned SizeWidth = Subtarget->is64Bit() ? 64 : 32;
1684  if (!MSI.getLength()->getType()->isIntegerTy(SizeWidth))
1685  return false;
1686 
1687  if (MSI.getDestAddressSpace() > 255)
1688  return false;
1689 
1690  return DoSelectCall(&I, "memset");
1691  }
1693  // Emit code to store the stack guard onto the stack.
1694  EVT PtrTy = TLI.getPointerTy();
1695 
1696  const Value *Op1 = I.getArgOperand(0); // The guard's value.
1697  const AllocaInst *Slot = cast<AllocaInst>(I.getArgOperand(1));
1698 
1699  // Grab the frame index.
1700  X86AddressMode AM;
1701  if (!X86SelectAddress(Slot, AM)) return false;
1702  if (!X86FastEmitStore(PtrTy, Op1, AM)) return false;
1703  return true;
1704  }
1705  case Intrinsic::dbg_declare: {
1706  const DbgDeclareInst *DI = cast<DbgDeclareInst>(&I);
1707  X86AddressMode AM;
1708  assert(DI->getAddress() && "Null address should be checked earlier!");
1709  if (!X86SelectAddress(DI->getAddress(), AM))
1710  return false;
1711  const MCInstrDesc &II = TII.get(TargetOpcode::DBG_VALUE);
1712  // FIXME may need to add RegState::Debug to any registers produced,
1713  // although ESP/EBP should be the only ones at the moment.
1714  addFullAddress(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, II), AM).
1715  addImm(0).addMetadata(DI->getVariable());
1716  return true;
1717  }
1718  case Intrinsic::trap: {
1719  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(X86::TRAP));
1720  return true;
1721  }
1724  // FIXME: Should fold immediates.
1725 
1726  // Replace "add with overflow" intrinsics with an "add" instruction followed
1727  // by a seto/setc instruction.
1728  const Function *Callee = I.getCalledFunction();
1729  Type *RetTy =
1730  cast<StructType>(Callee->getReturnType())->getTypeAtIndex(unsigned(0));
1731 
1732  MVT VT;
1733  if (!isTypeLegal(RetTy, VT))
1734  return false;
1735 
1736  const Value *Op1 = I.getArgOperand(0);
1737  const Value *Op2 = I.getArgOperand(1);
1738  unsigned Reg1 = getRegForValue(Op1);
1739  unsigned Reg2 = getRegForValue(Op2);
1740 
1741  if (Reg1 == 0 || Reg2 == 0)
1742  // FIXME: Handle values *not* in registers.
1743  return false;
1744 
1745  unsigned OpC = 0;
1746  if (VT == MVT::i32)
1747  OpC = X86::ADD32rr;
1748  else if (VT == MVT::i64)
1749  OpC = X86::ADD64rr;
1750  else
1751  return false;
1752 
1753  // The call to CreateRegs builds two sequential registers, to store the
1754  // both the returned values.
1755  unsigned ResultReg = FuncInfo.CreateRegs(I.getType());
1756  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(OpC), ResultReg)
1757  .addReg(Reg1).addReg(Reg2);
1758 
1759  unsigned Opc = X86::SETBr;
1761  Opc = X86::SETOr;
1762  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(Opc), ResultReg+1);
1763 
1764  UpdateValueMap(&I, ResultReg, 2);
1765  return true;
1766  }
1767  }
1768 }
1769 
1770 bool X86FastISel::FastLowerArguments() {
1771  if (!FuncInfo.CanLowerReturn)
1772  return false;
1773 
1774  const Function *F = FuncInfo.Fn;
1775  if (F->isVarArg())
1776  return false;
1777 
1778  CallingConv::ID CC = F->getCallingConv();
1779  if (CC != CallingConv::C)
1780  return false;
1781 
1782  if (Subtarget->isCallingConvWin64(CC))
1783  return false;
1784 
1785  if (!Subtarget->is64Bit())
1786  return false;
1787 
1788  // Only handle simple cases. i.e. Up to 6 i32/i64 scalar arguments.
1789  unsigned Idx = 1;
1790  for (Function::const_arg_iterator I = F->arg_begin(), E = F->arg_end();
1791  I != E; ++I, ++Idx) {
1792  if (Idx > 6)
1793  return false;
1794 
1795  if (F->getAttributes().hasAttribute(Idx, Attribute::ByVal) ||
1799  return false;
1800 
1801  Type *ArgTy = I->getType();
1802  if (ArgTy->isStructTy() || ArgTy->isArrayTy() || ArgTy->isVectorTy())
1803  return false;
1804 
1805  EVT ArgVT = TLI.getValueType(ArgTy);
1806  if (!ArgVT.isSimple()) return false;
1807  switch (ArgVT.getSimpleVT().SimpleTy) {
1808  case MVT::i32:
1809  case MVT::i64:
1810  break;
1811  default:
1812  return false;
1813  }
1814  }
1815 
1816  static const uint16_t GPR32ArgRegs[] = {
1817  X86::EDI, X86::ESI, X86::EDX, X86::ECX, X86::R8D, X86::R9D
1818  };
1819  static const uint16_t GPR64ArgRegs[] = {
1820  X86::RDI, X86::RSI, X86::RDX, X86::RCX, X86::R8 , X86::R9
1821  };
1822 
1823  Idx = 0;
1824  const TargetRegisterClass *RC32 = TLI.getRegClassFor(MVT::i32);
1825  const TargetRegisterClass *RC64 = TLI.getRegClassFor(MVT::i64);
1826  for (Function::const_arg_iterator I = F->arg_begin(), E = F->arg_end();
1827  I != E; ++I, ++Idx) {
1828  bool is32Bit = TLI.getValueType(I->getType()) == MVT::i32;
1829  const TargetRegisterClass *RC = is32Bit ? RC32 : RC64;
1830  unsigned SrcReg = is32Bit ? GPR32ArgRegs[Idx] : GPR64ArgRegs[Idx];
1831  unsigned DstReg = FuncInfo.MF->addLiveIn(SrcReg, RC);
1832  // FIXME: Unfortunately it's necessary to emit a copy from the livein copy.
1833  // Without this, EmitLiveInCopies may eliminate the livein if its only
1834  // use is a bitcast (which isn't turned into an instruction).
1835  unsigned ResultReg = createResultReg(RC);
1836  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(TargetOpcode::COPY),
1837  ResultReg).addReg(DstReg, getKillRegState(true));
1838  UpdateValueMap(I, ResultReg);
1839  }
1840  return true;
1841 }
1842 
1843 bool X86FastISel::X86SelectCall(const Instruction *I) {
1844  const CallInst *CI = cast<CallInst>(I);
1845  const Value *Callee = CI->getCalledValue();
1846 
1847  // Can't handle inline asm yet.
1848  if (isa<InlineAsm>(Callee))
1849  return false;
1850 
1851  // Handle intrinsic calls.
1852  if (const IntrinsicInst *II = dyn_cast<IntrinsicInst>(CI))
1853  return X86VisitIntrinsicCall(*II);
1854 
1855  // Allow SelectionDAG isel to handle tail calls.
1856  if (cast<CallInst>(I)->isTailCall())
1857  return false;
1858 
1859  return DoSelectCall(I, 0);
1860 }
1861 
1862 static unsigned computeBytesPoppedByCallee(const X86Subtarget &Subtarget,
1863  const ImmutableCallSite &CS) {
1864  if (Subtarget.is64Bit())
1865  return 0;
1866  if (Subtarget.isTargetWindows())
1867  return 0;
1868  CallingConv::ID CC = CS.getCallingConv();
1869  if (CC == CallingConv::Fast || CC == CallingConv::GHC)
1870  return 0;
1871  if (!CS.paramHasAttr(1, Attribute::StructRet))
1872  return 0;
1873  if (CS.paramHasAttr(1, Attribute::InReg))
1874  return 0;
1875  return 4;
1876 }
1877 
1878 // Select either a call, or an llvm.memcpy/memmove/memset intrinsic
1879 bool X86FastISel::DoSelectCall(const Instruction *I, const char *MemIntName) {
1880  const CallInst *CI = cast<CallInst>(I);
1881  const Value *Callee = CI->getCalledValue();
1882 
1883  // Handle only C and fastcc calling conventions for now.
1884  ImmutableCallSite CS(CI);
1885  CallingConv::ID CC = CS.getCallingConv();
1886  bool isWin64 = Subtarget->isCallingConvWin64(CC);
1887  if (CC != CallingConv::C && CC != CallingConv::Fast &&
1890  return false;
1891 
1892  // fastcc with -tailcallopt is intended to provide a guaranteed
1893  // tail call optimization. Fastisel doesn't know how to do that.
1894  if (CC == CallingConv::Fast && TM.Options.GuaranteedTailCallOpt)
1895  return false;
1896 
1897  PointerType *PT = cast<PointerType>(CS.getCalledValue()->getType());
1898  FunctionType *FTy = cast<FunctionType>(PT->getElementType());
1899  bool isVarArg = FTy->isVarArg();
1900 
1901  // Don't know how to handle Win64 varargs yet. Nothing special needed for
1902  // x86-32. Special handling for x86-64 is implemented.
1903  if (isVarArg && isWin64)
1904  return false;
1905 
1906  // Fast-isel doesn't know about callee-pop yet.
1907  if (X86::isCalleePop(CC, Subtarget->is64Bit(), isVarArg,
1908  TM.Options.GuaranteedTailCallOpt))
1909  return false;
1910 
1911  // Check whether the function can return without sret-demotion.
1913  GetReturnInfo(I->getType(), CS.getAttributes(), Outs, TLI);
1914  bool CanLowerReturn = TLI.CanLowerReturn(CS.getCallingConv(),
1915  *FuncInfo.MF, FTy->isVarArg(),
1916  Outs, FTy->getContext());
1917  if (!CanLowerReturn)
1918  return false;
1919 
1920  // Materialize callee address in a register. FIXME: GV address can be
1921  // handled with a CALLpcrel32 instead.
1922  X86AddressMode CalleeAM;
1923  if (!X86SelectCallAddress(Callee, CalleeAM))
1924  return false;
1925  unsigned CalleeOp = 0;
1926  const GlobalValue *GV = 0;
1927  if (CalleeAM.GV != 0) {
1928  GV = CalleeAM.GV;
1929  } else if (CalleeAM.Base.Reg != 0) {
1930  CalleeOp = CalleeAM.Base.Reg;
1931  } else
1932  return false;
1933 
1934  // Deal with call operands first.
1937  SmallVector<MVT, 8> ArgVTs;
1939  unsigned arg_size = CS.arg_size();
1940  Args.reserve(arg_size);
1941  ArgVals.reserve(arg_size);
1942  ArgVTs.reserve(arg_size);
1943  ArgFlags.reserve(arg_size);
1944  for (ImmutableCallSite::arg_iterator i = CS.arg_begin(), e = CS.arg_end();
1945  i != e; ++i) {
1946  // If we're lowering a mem intrinsic instead of a regular call, skip the
1947  // last two arguments, which should not passed to the underlying functions.
1948  if (MemIntName && e-i <= 2)
1949  break;
1950  Value *ArgVal = *i;
1952  unsigned AttrInd = i - CS.arg_begin() + 1;
1953  if (CS.paramHasAttr(AttrInd, Attribute::SExt))
1954  Flags.setSExt();
1955  if (CS.paramHasAttr(AttrInd, Attribute::ZExt))
1956  Flags.setZExt();
1957 
1958  if (CS.paramHasAttr(AttrInd, Attribute::ByVal)) {
1959  PointerType *Ty = cast<PointerType>(ArgVal->getType());
1960  Type *ElementTy = Ty->getElementType();
1961  unsigned FrameSize = TD.getTypeAllocSize(ElementTy);
1962  unsigned FrameAlign = CS.getParamAlignment(AttrInd);
1963  if (!FrameAlign)
1964  FrameAlign = TLI.getByValTypeAlignment(ElementTy);
1965  Flags.setByVal();
1966  Flags.setByValSize(FrameSize);
1967  Flags.setByValAlign(FrameAlign);
1968  if (!IsMemcpySmall(FrameSize))
1969  return false;
1970  }
1971 
1972  if (CS.paramHasAttr(AttrInd, Attribute::InReg))
1973  Flags.setInReg();
1974  if (CS.paramHasAttr(AttrInd, Attribute::Nest))
1975  Flags.setNest();
1976 
1977  // If this is an i1/i8/i16 argument, promote to i32 to avoid an extra
1978  // instruction. This is safe because it is common to all fastisel supported
1979  // calling conventions on x86.
1980  if (ConstantInt *CI = dyn_cast<ConstantInt>(ArgVal)) {
1981  if (CI->getBitWidth() == 1 || CI->getBitWidth() == 8 ||
1982  CI->getBitWidth() == 16) {
1983  if (Flags.isSExt())
1985  else
1987  }
1988  }
1989 
1990  unsigned ArgReg;
1991 
1992  // Passing bools around ends up doing a trunc to i1 and passing it.
1993  // Codegen this as an argument + "and 1".
1994  if (ArgVal->getType()->isIntegerTy(1) && isa<TruncInst>(ArgVal) &&
1995  cast<TruncInst>(ArgVal)->getParent() == I->getParent() &&
1996  ArgVal->hasOneUse()) {
1997  ArgVal = cast<TruncInst>(ArgVal)->getOperand(0);
1998  ArgReg = getRegForValue(ArgVal);
1999  if (ArgReg == 0) return false;
2000 
2001  MVT ArgVT;
2002  if (!isTypeLegal(ArgVal->getType(), ArgVT)) return false;
2003 
2004  ArgReg = FastEmit_ri(ArgVT, ArgVT, ISD::AND, ArgReg,
2005  ArgVal->hasOneUse(), 1);
2006  } else {
2007  ArgReg = getRegForValue(ArgVal);
2008  }
2009 
2010  if (ArgReg == 0) return false;
2011 
2012  Type *ArgTy = ArgVal->getType();
2013  MVT ArgVT;
2014  if (!isTypeLegal(ArgTy, ArgVT))
2015  return false;
2016  if (ArgVT == MVT::x86mmx)
2017  return false;
2018  unsigned OriginalAlignment = TD.getABITypeAlignment(ArgTy);
2019  Flags.setOrigAlign(OriginalAlignment);
2020 
2021  Args.push_back(ArgReg);
2022  ArgVals.push_back(ArgVal);
2023  ArgVTs.push_back(ArgVT);
2024  ArgFlags.push_back(Flags);
2025  }
2026 
2027  // Analyze operands of the call, assigning locations to each operand.
2029  CCState CCInfo(CC, isVarArg, *FuncInfo.MF, TM, ArgLocs,
2030  I->getParent()->getContext());
2031 
2032  // Allocate shadow area for Win64
2033  if (isWin64)
2034  CCInfo.AllocateStack(32, 8);
2035 
2036  CCInfo.AnalyzeCallOperands(ArgVTs, ArgFlags, CC_X86);
2037 
2038  // Get a count of how many bytes are to be pushed on the stack.
2039  unsigned NumBytes = CCInfo.getNextStackOffset();
2040 
2041  // Issue CALLSEQ_START
2042  unsigned AdjStackDown = TII.getCallFrameSetupOpcode();
2043  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(AdjStackDown))
2044  .addImm(NumBytes);
2045 
2046  // Process argument: walk the register/memloc assignments, inserting
2047  // copies / loads.
2048  SmallVector<unsigned, 4> RegArgs;
2049  for (unsigned i = 0, e = ArgLocs.size(); i != e; ++i) {
2050  CCValAssign &VA = ArgLocs[i];
2051  unsigned Arg = Args[VA.getValNo()];
2052  EVT ArgVT = ArgVTs[VA.getValNo()];
2053 
2054  // Promote the value if needed.
2055  switch (VA.getLocInfo()) {
2056  case CCValAssign::Full: break;
2057  case CCValAssign::SExt: {
2058  assert(VA.getLocVT().isInteger() && !VA.getLocVT().isVector() &&
2059  "Unexpected extend");
2060  bool Emitted = X86FastEmitExtend(ISD::SIGN_EXTEND, VA.getLocVT(),
2061  Arg, ArgVT, Arg);
2062  assert(Emitted && "Failed to emit a sext!"); (void)Emitted;
2063  ArgVT = VA.getLocVT();
2064  break;
2065  }
2066  case CCValAssign::ZExt: {
2067  assert(VA.getLocVT().isInteger() && !VA.getLocVT().isVector() &&
2068  "Unexpected extend");
2069  bool Emitted = X86FastEmitExtend(ISD::ZERO_EXTEND, VA.getLocVT(),
2070  Arg, ArgVT, Arg);
2071  assert(Emitted && "Failed to emit a zext!"); (void)Emitted;
2072  ArgVT = VA.getLocVT();
2073  break;
2074  }
2075  case CCValAssign::AExt: {
2076  assert(VA.getLocVT().isInteger() && !VA.getLocVT().isVector() &&
2077  "Unexpected extend");
2078  bool Emitted = X86FastEmitExtend(ISD::ANY_EXTEND, VA.getLocVT(),
2079  Arg, ArgVT, Arg);
2080  if (!Emitted)
2081  Emitted = X86FastEmitExtend(ISD::ZERO_EXTEND, VA.getLocVT(),
2082  Arg, ArgVT, Arg);
2083  if (!Emitted)
2084  Emitted = X86FastEmitExtend(ISD::SIGN_EXTEND, VA.getLocVT(),
2085  Arg, ArgVT, Arg);
2086 
2087  assert(Emitted && "Failed to emit a aext!"); (void)Emitted;
2088  ArgVT = VA.getLocVT();
2089  break;
2090  }
2091  case CCValAssign::BCvt: {
2092  unsigned BC = FastEmit_r(ArgVT.getSimpleVT(), VA.getLocVT(),
2093  ISD::BITCAST, Arg, /*TODO: Kill=*/false);
2094  assert(BC != 0 && "Failed to emit a bitcast!");
2095  Arg = BC;
2096  ArgVT = VA.getLocVT();
2097  break;
2098  }
2099  case CCValAssign::VExt:
2100  // VExt has not been implemented, so this should be impossible to reach
2101  // for now. However, fallback to Selection DAG isel once implemented.
2102  return false;
2103  case CCValAssign::Indirect:
2104  // FIXME: Indirect doesn't need extending, but fast-isel doesn't fully
2105  // support this.
2106  return false;
2107  }
2108 
2109  if (VA.isRegLoc()) {
2110  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(TargetOpcode::COPY),
2111  VA.getLocReg()).addReg(Arg);
2112  RegArgs.push_back(VA.getLocReg());
2113  } else {
2114  unsigned LocMemOffset = VA.getLocMemOffset();
2115  X86AddressMode AM;
2116  const X86RegisterInfo *RegInfo = static_cast<const X86RegisterInfo*>(
2117  getTargetMachine()->getRegisterInfo());
2118  AM.Base.Reg = RegInfo->getStackRegister();
2119  AM.Disp = LocMemOffset;
2120  const Value *ArgVal = ArgVals[VA.getValNo()];
2121  ISD::ArgFlagsTy Flags = ArgFlags[VA.getValNo()];
2122 
2123  if (Flags.isByVal()) {
2124  X86AddressMode SrcAM;
2125  SrcAM.Base.Reg = Arg;
2126  bool Res = TryEmitSmallMemcpy(AM, SrcAM, Flags.getByValSize());
2127  assert(Res && "memcpy length already checked!"); (void)Res;
2128  } else if (isa<ConstantInt>(ArgVal) || isa<ConstantPointerNull>(ArgVal)) {
2129  // If this is a really simple value, emit this with the Value* version
2130  // of X86FastEmitStore. If it isn't simple, we don't want to do this,
2131  // as it can cause us to reevaluate the argument.
2132  if (!X86FastEmitStore(ArgVT, ArgVal, AM))
2133  return false;
2134  } else {
2135  if (!X86FastEmitStore(ArgVT, Arg, AM))
2136  return false;
2137  }
2138  }
2139  }
2140 
2141  // ELF / PIC requires GOT in the EBX register before function calls via PLT
2142  // GOT pointer.
2143  if (Subtarget->isPICStyleGOT()) {
2144  unsigned Base = getInstrInfo()->getGlobalBaseReg(FuncInfo.MF);
2145  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(TargetOpcode::COPY),
2146  X86::EBX).addReg(Base);
2147  }
2148 
2149  if (Subtarget->is64Bit() && isVarArg && !isWin64) {
2150  // Count the number of XMM registers allocated.
2151  static const uint16_t XMMArgRegs[] = {
2152  X86::XMM0, X86::XMM1, X86::XMM2, X86::XMM3,
2153  X86::XMM4, X86::XMM5, X86::XMM6, X86::XMM7
2154  };
2155  unsigned NumXMMRegs = CCInfo.getFirstUnallocated(XMMArgRegs, 8);
2156  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(X86::MOV8ri),
2157  X86::AL).addImm(NumXMMRegs);
2158  }
2159 
2160  // Issue the call.
2161  MachineInstrBuilder MIB;
2162  if (CalleeOp) {
2163  // Register-indirect call.
2164  unsigned CallOpc;
2165  if (Subtarget->is64Bit())
2166  CallOpc = X86::CALL64r;
2167  else
2168  CallOpc = X86::CALL32r;
2169  MIB = BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(CallOpc))
2170  .addReg(CalleeOp);
2171 
2172  } else {
2173  // Direct call.
2174  assert(GV && "Not a direct call");
2175  unsigned CallOpc;
2176  if (Subtarget->is64Bit())
2177  CallOpc = X86::CALL64pcrel32;
2178  else
2179  CallOpc = X86::CALLpcrel32;
2180 
2181  // See if we need any target-specific flags on the GV operand.
2182  unsigned char OpFlags = 0;
2183 
2184  // On ELF targets, in both X86-64 and X86-32 mode, direct calls to
2185  // external symbols most go through the PLT in PIC mode. If the symbol
2186  // has hidden or protected visibility, or if it is static or local, then
2187  // we don't need to use the PLT - we can directly call it.
2188  if (Subtarget->isTargetELF() &&
2189  TM.getRelocationModel() == Reloc::PIC_ &&
2190  GV->hasDefaultVisibility() && !GV->hasLocalLinkage()) {
2191  OpFlags = X86II::MO_PLT;
2192  } else if (Subtarget->isPICStyleStubAny() &&
2193  (GV->isDeclaration() || GV->isWeakForLinker()) &&
2194  (!Subtarget->getTargetTriple().isMacOSX() ||
2195  Subtarget->getTargetTriple().isMacOSXVersionLT(10, 5))) {
2196  // PC-relative references to external symbols should go through $stub,
2197  // unless we're building with the leopard linker or later, which
2198  // automatically synthesizes these stubs.
2199  OpFlags = X86II::MO_DARWIN_STUB;
2200  }
2201 
2202 
2203  MIB = BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(CallOpc));
2204  if (MemIntName)
2205  MIB.addExternalSymbol(MemIntName, OpFlags);
2206  else
2207  MIB.addGlobalAddress(GV, 0, OpFlags);
2208  }
2209 
2210  // Add a register mask with the call-preserved registers.
2211  // Proper defs for return values will be added by setPhysRegsDeadExcept().
2212  MIB.addRegMask(TRI.getCallPreservedMask(CS.getCallingConv()));
2213 
2214  // Add an implicit use GOT pointer in EBX.
2215  if (Subtarget->isPICStyleGOT())
2217 
2218  if (Subtarget->is64Bit() && isVarArg && !isWin64)
2220 
2221  // Add implicit physical register uses to the call.
2222  for (unsigned i = 0, e = RegArgs.size(); i != e; ++i)
2223  MIB.addReg(RegArgs[i], RegState::Implicit);
2224 
2225  // Issue CALLSEQ_END
2226  unsigned AdjStackUp = TII.getCallFrameDestroyOpcode();
2227  const unsigned NumBytesCallee = computeBytesPoppedByCallee(*Subtarget, CS);
2228  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(AdjStackUp))
2229  .addImm(NumBytes).addImm(NumBytesCallee);
2230 
2231  // Build info for return calling conv lowering code.
2232  // FIXME: This is practically a copy-paste from TargetLowering::LowerCallTo.
2234  SmallVector<EVT, 4> RetTys;
2235  ComputeValueVTs(TLI, I->getType(), RetTys);
2236  for (unsigned i = 0, e = RetTys.size(); i != e; ++i) {
2237  EVT VT = RetTys[i];
2238  MVT RegisterVT = TLI.getRegisterType(I->getParent()->getContext(), VT);
2239  unsigned NumRegs = TLI.getNumRegisters(I->getParent()->getContext(), VT);
2240  for (unsigned j = 0; j != NumRegs; ++j) {
2241  ISD::InputArg MyFlags;
2242  MyFlags.VT = RegisterVT;
2243  MyFlags.Used = !CS.getInstruction()->use_empty();
2244  if (CS.paramHasAttr(0, Attribute::SExt))
2245  MyFlags.Flags.setSExt();
2246  if (CS.paramHasAttr(0, Attribute::ZExt))
2247  MyFlags.Flags.setZExt();
2248  if (CS.paramHasAttr(0, Attribute::InReg))
2249  MyFlags.Flags.setInReg();
2250  Ins.push_back(MyFlags);
2251  }
2252  }
2253 
2254  // Now handle call return values.
2255  SmallVector<unsigned, 4> UsedRegs;
2257  CCState CCRetInfo(CC, false, *FuncInfo.MF, TM, RVLocs,
2258  I->getParent()->getContext());
2259  unsigned ResultReg = FuncInfo.CreateRegs(I->getType());
2260  CCRetInfo.AnalyzeCallResult(Ins, RetCC_X86);
2261  for (unsigned i = 0; i != RVLocs.size(); ++i) {
2262  EVT CopyVT = RVLocs[i].getValVT();
2263  unsigned CopyReg = ResultReg + i;
2264 
2265  // If this is a call to a function that returns an fp value on the x87 fp
2266  // stack, but where we prefer to use the value in xmm registers, copy it
2267  // out as F80 and use a truncate to move it from fp stack reg to xmm reg.
2268  if ((RVLocs[i].getLocReg() == X86::ST0 ||
2269  RVLocs[i].getLocReg() == X86::ST1)) {
2270  if (isScalarFPTypeInSSEReg(RVLocs[i].getValVT())) {
2271  CopyVT = MVT::f80;
2272  CopyReg = createResultReg(&X86::RFP80RegClass);
2273  }
2274  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(X86::FpPOP_RETVAL),
2275  CopyReg);
2276  } else {
2277  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(TargetOpcode::COPY),
2278  CopyReg).addReg(RVLocs[i].getLocReg());
2279  UsedRegs.push_back(RVLocs[i].getLocReg());
2280  }
2281 
2282  if (CopyVT != RVLocs[i].getValVT()) {
2283  // Round the F80 the right size, which also moves to the appropriate xmm
2284  // register. This is accomplished by storing the F80 value in memory and
2285  // then loading it back. Ewww...
2286  EVT ResVT = RVLocs[i].getValVT();
2287  unsigned Opc = ResVT == MVT::f32 ? X86::ST_Fp80m32 : X86::ST_Fp80m64;
2288  unsigned MemSize = ResVT.getSizeInBits()/8;
2289  int FI = MFI.CreateStackObject(MemSize, MemSize, false);
2290  addFrameReference(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL,
2291  TII.get(Opc)), FI)
2292  .addReg(CopyReg);
2293  Opc = ResVT == MVT::f32 ? X86::MOVSSrm : X86::MOVSDrm;
2294  addFrameReference(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL,
2295  TII.get(Opc), ResultReg + i), FI);
2296  }
2297  }
2298 
2299  if (RVLocs.size())
2300  UpdateValueMap(I, ResultReg, RVLocs.size());
2301 
2302  // Set all unused physreg defs as dead.
2303  static_cast<MachineInstr *>(MIB)->setPhysRegsDeadExcept(UsedRegs, TRI);
2304 
2305  return true;
2306 }
2307 
2308 
2309 bool
2310 X86FastISel::TargetSelectInstruction(const Instruction *I) {
2311  switch (I->getOpcode()) {
2312  default: break;
2313  case Instruction::Load:
2314  return X86SelectLoad(I);
2315  case Instruction::Store:
2316  return X86SelectStore(I);
2317  case Instruction::Ret:
2318  return X86SelectRet(I);
2319  case Instruction::ICmp:
2320  case Instruction::FCmp:
2321  return X86SelectCmp(I);
2322  case Instruction::ZExt:
2323  return X86SelectZExt(I);
2324  case Instruction::Br:
2325  return X86SelectBranch(I);
2326  case Instruction::Call:
2327  return X86SelectCall(I);
2328  case Instruction::LShr:
2329  case Instruction::AShr:
2330  case Instruction::Shl:
2331  return X86SelectShift(I);
2332  case Instruction::SDiv:
2333  case Instruction::UDiv:
2334  case Instruction::SRem:
2335  case Instruction::URem:
2336  return X86SelectDivRem(I);
2337  case Instruction::Select:
2338  return X86SelectSelect(I);
2339  case Instruction::Trunc:
2340  return X86SelectTrunc(I);
2341  case Instruction::FPExt:
2342  return X86SelectFPExt(I);
2343  case Instruction::FPTrunc:
2344  return X86SelectFPTrunc(I);
2345  case Instruction::IntToPtr: // Deliberate fall-through.
2346  case Instruction::PtrToInt: {
2347  EVT SrcVT = TLI.getValueType(I->getOperand(0)->getType());
2348  EVT DstVT = TLI.getValueType(I->getType());
2349  if (DstVT.bitsGT(SrcVT))
2350  return X86SelectZExt(I);
2351  if (DstVT.bitsLT(SrcVT))
2352  return X86SelectTrunc(I);
2353  unsigned Reg = getRegForValue(I->getOperand(0));
2354  if (Reg == 0) return false;
2355  UpdateValueMap(I, Reg);
2356  return true;
2357  }
2358  }
2359 
2360  return false;
2361 }
2362 
2363 unsigned X86FastISel::TargetMaterializeConstant(const Constant *C) {
2364  MVT VT;
2365  if (!isTypeLegal(C->getType(), VT))
2366  return 0;
2367 
2368  // Can't handle alternate code models yet.
2369  if (TM.getCodeModel() != CodeModel::Small)
2370  return 0;
2371 
2372  // Get opcode and regclass of the output for the given load instruction.
2373  unsigned Opc = 0;
2374  const TargetRegisterClass *RC = NULL;
2375  switch (VT.SimpleTy) {
2376  default: return 0;
2377  case MVT::i8:
2378  Opc = X86::MOV8rm;
2379  RC = &X86::GR8RegClass;
2380  break;
2381  case MVT::i16:
2382  Opc = X86::MOV16rm;
2383  RC = &X86::GR16RegClass;
2384  break;
2385  case MVT::i32:
2386  Opc = X86::MOV32rm;
2387  RC = &X86::GR32RegClass;
2388  break;
2389  case MVT::i64:
2390  // Must be in x86-64 mode.
2391  Opc = X86::MOV64rm;
2392  RC = &X86::GR64RegClass;
2393  break;
2394  case MVT::f32:
2395  if (X86ScalarSSEf32) {
2396  Opc = Subtarget->hasAVX() ? X86::VMOVSSrm : X86::MOVSSrm;
2397  RC = &X86::FR32RegClass;
2398  } else {
2399  Opc = X86::LD_Fp32m;
2400  RC = &X86::RFP32RegClass;
2401  }
2402  break;
2403  case MVT::f64:
2404  if (X86ScalarSSEf64) {
2405  Opc = Subtarget->hasAVX() ? X86::VMOVSDrm : X86::MOVSDrm;
2406  RC = &X86::FR64RegClass;
2407  } else {
2408  Opc = X86::LD_Fp64m;
2409  RC = &X86::RFP64RegClass;
2410  }
2411  break;
2412  case MVT::f80:
2413  // No f80 support yet.
2414  return 0;
2415  }
2416 
2417  // Materialize addresses with LEA instructions.
2418  if (isa<GlobalValue>(C)) {
2419  X86AddressMode AM;
2420  if (X86SelectAddress(C, AM)) {
2421  // If the expression is just a basereg, then we're done, otherwise we need
2422  // to emit an LEA.
2423  if (AM.BaseType == X86AddressMode::RegBase &&
2424  AM.IndexReg == 0 && AM.Disp == 0 && AM.GV == 0)
2425  return AM.Base.Reg;
2426 
2427  Opc = TLI.getPointerTy() == MVT::i32 ? X86::LEA32r : X86::LEA64r;
2428  unsigned ResultReg = createResultReg(RC);
2429  addFullAddress(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL,
2430  TII.get(Opc), ResultReg), AM);
2431  return ResultReg;
2432  }
2433  return 0;
2434  }
2435 
2436  // MachineConstantPool wants an explicit alignment.
2437  unsigned Align = TD.getPrefTypeAlignment(C->getType());
2438  if (Align == 0) {
2439  // Alignment of vector types. FIXME!
2440  Align = TD.getTypeAllocSize(C->getType());
2441  }
2442 
2443  // x86-32 PIC requires a PIC base register for constant pools.
2444  unsigned PICBase = 0;
2445  unsigned char OpFlag = 0;
2446  if (Subtarget->isPICStyleStubPIC()) { // Not dynamic-no-pic
2447  OpFlag = X86II::MO_PIC_BASE_OFFSET;
2448  PICBase = getInstrInfo()->getGlobalBaseReg(FuncInfo.MF);
2449  } else if (Subtarget->isPICStyleGOT()) {
2450  OpFlag = X86II::MO_GOTOFF;
2451  PICBase = getInstrInfo()->getGlobalBaseReg(FuncInfo.MF);
2452  } else if (Subtarget->isPICStyleRIPRel() &&
2453  TM.getCodeModel() == CodeModel::Small) {
2454  PICBase = X86::RIP;
2455  }
2456 
2457  // Create the load from the constant pool.
2458  unsigned MCPOffset = MCP.getConstantPoolIndex(C, Align);
2459  unsigned ResultReg = createResultReg(RC);
2460  addConstantPoolReference(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL,
2461  TII.get(Opc), ResultReg),
2462  MCPOffset, PICBase, OpFlag);
2463 
2464  return ResultReg;
2465 }
2466 
2467 unsigned X86FastISel::TargetMaterializeAlloca(const AllocaInst *C) {
2468  // Fail on dynamic allocas. At this point, getRegForValue has already
2469  // checked its CSE maps, so if we're here trying to handle a dynamic
2470  // alloca, we're not going to succeed. X86SelectAddress has a
2471  // check for dynamic allocas, because it's called directly from
2472  // various places, but TargetMaterializeAlloca also needs a check
2473  // in order to avoid recursion between getRegForValue,
2474  // X86SelectAddrss, and TargetMaterializeAlloca.
2475  if (!FuncInfo.StaticAllocaMap.count(C))
2476  return 0;
2477 
2478  X86AddressMode AM;
2479  if (!X86SelectAddress(C, AM))
2480  return 0;
2481  unsigned Opc = Subtarget->is64Bit() ? X86::LEA64r : X86::LEA32r;
2482  const TargetRegisterClass* RC = TLI.getRegClassFor(TLI.getPointerTy());
2483  unsigned ResultReg = createResultReg(RC);
2484  addFullAddress(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL,
2485  TII.get(Opc), ResultReg), AM);
2486  return ResultReg;
2487 }
2488 
2489 unsigned X86FastISel::TargetMaterializeFloatZero(const ConstantFP *CF) {
2490  MVT VT;
2491  if (!isTypeLegal(CF->getType(), VT))
2492  return 0;
2493 
2494  // Get opcode and regclass for the given zero.
2495  unsigned Opc = 0;
2496  const TargetRegisterClass *RC = NULL;
2497  switch (VT.SimpleTy) {
2498  default: return 0;
2499  case MVT::f32:
2500  if (X86ScalarSSEf32) {
2501  Opc = X86::FsFLD0SS;
2502  RC = &X86::FR32RegClass;
2503  } else {
2504  Opc = X86::LD_Fp032;
2505  RC = &X86::RFP32RegClass;
2506  }
2507  break;
2508  case MVT::f64:
2509  if (X86ScalarSSEf64) {
2510  Opc = X86::FsFLD0SD;
2511  RC = &X86::FR64RegClass;
2512  } else {
2513  Opc = X86::LD_Fp064;
2514  RC = &X86::RFP64RegClass;
2515  }
2516  break;
2517  case MVT::f80:
2518  // No f80 support yet.
2519  return 0;
2520  }
2521 
2522  unsigned ResultReg = createResultReg(RC);
2523  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(Opc), ResultReg);
2524  return ResultReg;
2525 }
2526 
2527 
2528 bool X86FastISel::tryToFoldLoadIntoMI(MachineInstr *MI, unsigned OpNo,
2529  const LoadInst *LI) {
2530  X86AddressMode AM;
2531  if (!X86SelectAddress(LI->getOperand(0), AM))
2532  return false;
2533 
2534  const X86InstrInfo &XII = (const X86InstrInfo&)TII;
2535 
2536  unsigned Size = TD.getTypeAllocSize(LI->getType());
2537  unsigned Alignment = LI->getAlignment();
2538 
2540  AM.getFullAddress(AddrOps);
2541 
2542  MachineInstr *Result =
2543  XII.foldMemoryOperandImpl(*FuncInfo.MF, MI, OpNo, AddrOps, Size, Alignment);
2544  if (Result == 0) return false;
2545 
2546  FuncInfo.MBB->insert(FuncInfo.InsertPt, Result);
2547  MI->eraseFromParent();
2548  return true;
2549 }
2550 
2551 
2552 namespace llvm {
2554  const TargetLibraryInfo *libInfo) {
2555  return new X86FastISel(funcInfo, libInfo);
2556  }
2557 }
bool isInt< 32 >(int64_t x)
Definition: MathExtras.h:276
Value * getValueOperand()
Definition: Instructions.h:343
const Value * getCalledValue() const
const MachineInstrBuilder & addMetadata(const MDNode *MD) const
void setByValAlign(unsigned A)
The C convention as specified in the x86-64 supplement to the System V ABI, used on most non-Windows ...
Definition: CallingConv.h:126
Abstract base class of comparison instructions.
Definition: InstrTypes.h:633
void ComputeValueVTs(const TargetLowering &TLI, Type *Ty, SmallVectorImpl< EVT > &ValueVTs, SmallVectorImpl< uint64_t > *Offsets=0, uint64_t StartingOffset=0)
MVT getValVT() const
void reserve(unsigned N)
Definition: SmallVector.h:425
Sign extended before/after call.
Definition: Attributes.h:97
LocInfo getLocInfo() const
static const MachineInstrBuilder & addConstantPoolReference(const MachineInstrBuilder &MIB, unsigned CPI, unsigned GlobalBaseReg, unsigned char OpFlags)
bool isVolatile() const
Force argument to be passed in register.
Definition: Attributes.h:76
Intrinsic::ID getIntrinsicID() const
Definition: IntrinsicInst.h:43
enable_if_c<!is_simple_type< Y >::value, typename cast_retty< X, const Y >::ret_type >::type dyn_cast(const Y &Val)
Definition: Casting.h:266
unsigned getNumOperands() const
Definition: User.h:108
Nested function static chain.
Definition: Attributes.h:79
Predicate getInversePredicate() const
Return the inverse of the instruction's predicate.
Definition: InstrTypes.h:737
unsigned less or equal
Definition: InstrTypes.h:677
unsigned less than
Definition: InstrTypes.h:676
0 1 0 0 True if ordered and less than
Definition: InstrTypes.h:657
virtual MachineInstr * foldMemoryOperandImpl(MachineFunction &MF, MachineInstr *MI, const SmallVectorImpl< unsigned > &Ops, int FrameIndex) const
unsigned getSizeInBits() const
Definition: ValueTypes.h:359
bool isDoubleTy() const
isDoubleTy - Return true if this is 'double', a 64-bit IEEE fp type.
Definition: Type.h:149
unsigned getByValSize() const
1 1 1 0 True if unordered or not equal
Definition: InstrTypes.h:667
Type * getReturnType() const
Definition: Function.cpp:179
const Function * getParent() const
Return the enclosing method, or null if none.
Definition: BasicBlock.h:116
arg_iterator arg_end()
Definition: Function.h:418
const GlobalValue * GV
F(f)
bool hasAttribute(unsigned Index, Attribute::AttrKind Kind) const
Return true if the attribute exists at the given index.
Definition: Attributes.cpp:818
unsigned getValNo() const
op_iterator op_begin()
Definition: User.h:116
bool bitsLT(EVT VT) const
bitsLT - Return true if this has less bits than VT.
Definition: ValueTypes.h:735
bool isRegLoc() const
LoopInfoBase< BlockT, LoopT > * LI
Definition: LoopInfoImpl.h:411
CallingConv::ID getCallingConv() const
Definition: Function.h:161
static Constant * getNullValue(Type *Ty)
Definition: Constants.cpp:111
1 0 0 1 True if unordered or equal
Definition: InstrTypes.h:662
1 0 0 0 True if unordered: isnan(X) | isnan(Y)
Definition: InstrTypes.h:661
const HexagonInstrInfo * TII
bool hasDefaultVisibility() const
Definition: GlobalValue.h:88
#define llvm_unreachable(msg)
Definition: Use.h:60
static bool isGlobalStubReference(unsigned char TargetFlag)
Definition: X86InstrInfo.h:75
0 1 0 1 True if ordered and less than or equal
Definition: InstrTypes.h:658
SimpleValueType SimpleTy
Definition: ValueTypes.h:161
static unsigned computeBytesPoppedByCallee(const X86Subtarget &Subtarget, const ImmutableCallSite &CS)
void setByValSize(unsigned S)
ID
LLVM Calling Convention Representation.
Definition: CallingConv.h:26
const MachineInstrBuilder & addImm(int64_t Val) const
Hidden pointer to structure to return.
Definition: Attributes.h:105
static bool isGlobalRelativeToPICBase(unsigned char TargetFlag)
Definition: X86InstrInfo.h:92
unsigned getLocReg() const
union llvm::X86AddressMode::@223 Base
bool is64Bit() const
Is this x86_64? (disregarding specific ABI / programming model)
Definition: X86Subtarget.h:240
CallingConv::ID getCallingConv() const
Definition: CallSite.h:170
BasicBlock * getSuccessor(unsigned i) const
Pass structure by value.
Definition: Attributes.h:73
bool isArrayTy() const
Definition: Type.h:216
MDNode * getVariable() const
Definition: IntrinsicInst.h:84
void setOrigAlign(unsigned A)
Type * getElementType() const
Definition: DerivedTypes.h:319
This class represents a truncation of integer types.
unsigned getKillRegState(bool B)
bool hasSSE2() const
Definition: X86Subtarget.h:260
static bool isWeakForLinker(LinkageTypes Linkage)
Definition: GlobalValue.h:183
uint64_t getElementOffset(unsigned Idx) const
Definition: DataLayout.h:442
User::const_op_iterator arg_iterator
Definition: CallSite.h:133
void GetReturnInfo(Type *ReturnType, AttributeSet attr, SmallVectorImpl< ISD::OutputArg > &Outs, const TargetLowering &TLI)
bool isInteger() const
isInteger - Return true if this is an integer, or a vector integer type.
Definition: ValueTypes.h:182
unsigned getAlignment() const
Definition: Instructions.h:301
bool isVectorTy() const
Definition: Type.h:229
MVT getLocVT() const
const MCRegisterClass & getRegClass(unsigned i) const
Returns the register class associated with the enumeration value. See class MCOperandInfo.
LLVM Constant Representation.
Definition: Constant.h:41
bool isVector() const
isVector - Return true if this is a vector value type.
Definition: ValueTypes.h:190
bool isFloatTy() const
isFloatTy - Return true if this is 'float', a 32-bit IEEE fp type.
Definition: Type.h:146
Value * getRawDest() const
op_iterator op_end()
Definition: User.h:118
TRAP - Trapping instruction.
Definition: ISDOpcodes.h:586
Value * getOperand(unsigned i) const
Definition: User.h:88
Zero extended before/after call.
Definition: Attributes.h:110
0 1 1 1 True if ordered (no nans)
Definition: InstrTypes.h:660
arg_iterator arg_begin()
Definition: Function.h:410
Predicate getPredicate() const
Return the predicate for this instruction.
Definition: InstrTypes.h:714
MachineInstrBuilder BuildMI(MachineFunction &MF, DebugLoc DL, const MCInstrDesc &MCID)
LLVMContext & getContext() const
All values hold a context through their type.
Definition: Value.cpp:517
static bool isAtomic(Instruction *I)
1 1 0 1 True if unordered, less than, or equal
Definition: InstrTypes.h:666
unsigned getStackRegister() const
signed greater than
Definition: InstrTypes.h:678
static const MachineInstrBuilder & addFrameReference(const MachineInstrBuilder &MIB, int FI, int Offset=0, bool mem=true)
bool isTargetWindows() const
Definition: X86Subtarget.h:326
bool bitsGT(EVT VT) const
bitsGT - Return true if this has more bits than VT.
Definition: ValueTypes.h:723
0 0 1 0 True if ordered and greater than
Definition: InstrTypes.h:655
Class for constant integers.
Definition: Constants.h:51
1 1 0 0 True if unordered or less than
Definition: InstrTypes.h:665
Type * getType() const
Definition: Value.h:111
CCValAssign - Represent assignment of one arg/retval to a location.
signed less than
Definition: InstrTypes.h:680
Value * getLength() const
void getFullAddress(SmallVectorImpl< MachineOperand > &MO)
Function * getCalledFunction() const
void swap(llvm::BitVector &LHS, llvm::BitVector &RHS)
Implement std::swap in terms of BitVector swap.
Definition: BitVector.h:591
AttributeSet getAttributes() const
Return the attribute list for this Function.
Definition: Function.h:170
Value * getArgOperand(unsigned i) const
signed less or equal
Definition: InstrTypes.h:681
bool isAtomic() const
Definition: Instructions.h:331
bool isIntegerTy() const
Definition: Type.h:196
ZERO_EXTEND - Used for integer types, zeroing the new bits.
Definition: ISDOpcodes.h:357
ANY_EXTEND - Used for integer types. The high bits are undefined.
Definition: ISDOpcodes.h:360
static cl::opt< AlignMode > Align(cl::desc("Load/store alignment support"), cl::Hidden, cl::init(DefaultAlign), cl::values(clEnumValN(DefaultAlign,"arm-default-align","Generate unaligned accesses only on hardware/OS ""combinations that are known to support them"), clEnumValN(StrictAlign,"arm-strict-align","Disallow all unaligned memory accesses"), clEnumValN(NoStrictAlign,"arm-no-strict-align","Allow unaligned memory accesses"), clEnumValEnd))
const MachineInstrBuilder & addGlobalAddress(const GlobalValue *GV, int64_t Offset=0, unsigned char TargetFlags=0) const
bool hasSSE1() const
Definition: X86Subtarget.h:259
bool isStructTy() const
Definition: Type.h:212
Value * getCondition() const
static Constant * getSExt(Constant *C, Type *Ty)
Definition: Constants.cpp:1541
Bitwise operators - logical and, logical or, logical xor.
Definition: ISDOpcodes.h:295
static IntegerType * getInt32Ty(LLVMContext &C)
Definition: Type.cpp:241
static Constant * getZExt(Constant *C, Type *Ty)
Definition: Constants.cpp:1555
bool isDeclaration() const
Definition: Globals.cpp:66
unsigned greater or equal
Definition: InstrTypes.h:675
static bool is32Bit(EVT VT)
unsigned getAlignment() const
Definition: Instructions.h:181
const MachineInstrBuilder & addExternalSymbol(const char *FnName, unsigned char TargetFlags=0) const
ImmutableCallSite - establish a view to a call site for examination.
Definition: CallSite.h:318
unsigned getSizeInBits() const
getSizeInBits - Return the size of the specified value type in bits.
Definition: ValueTypes.h:779
DBG_VALUE - a mapping of the llvm.dbg.value intrinsic.
Definition: TargetOpcodes.h:69
#define I(x, y, z)
Definition: MD5.cpp:54
void AnalyzeReturn(const SmallVectorImpl< ISD::OutputArg > &Outs, CCAssignFn Fn)
bool hasOneUse() const
Definition: Value.h:161
0 1 1 0 True if ordered and operands are unequal
Definition: InstrTypes.h:659
bool paramHasAttr(unsigned i, Attribute::AttrKind A) const
Return true if the call or the callee has the given attribute.
Definition: CallSite.h:192
enum llvm::X86AddressMode::@222 BaseType
1 0 1 0 True if unordered or greater than
Definition: InstrTypes.h:663
bool hasLocalLinkage() const
Definition: GlobalValue.h:211
bool isVarArg() const
Definition: DerivedTypes.h:120
bool isCalleePop(CallingConv::ID CallingConv, bool is64Bit, bool IsVarArg, bool TailCallOpt)
Value * getRawSource() const
The C convention as implemented on Windows/x86-64. This convention differs from the more common X86_6...
Definition: CallingConv.h:132
bool isSimple() const
Definition: ValueTypes.h:640
LLVMContext & getContext() const
Get the context in which this basic block lives.
Definition: BasicBlock.cpp:33
0 0 0 1 True if ordered and equal
Definition: InstrTypes.h:654
LLVM Value Representation.
Definition: Value.h:66
1 0 1 1 True if unordered, greater than, or equal
Definition: InstrTypes.h:664
unsigned getOpcode() const
getOpcode() returns a member of one of the enums like Instruction::Add.
Definition: Instruction.h:83
Value * getAddress() const
static unsigned X86ChooseCmpImmediateOpcode(EVT VT, const ConstantInt *RHSC)
unsigned getDestAddressSpace() const
unsigned greater than
Definition: InstrTypes.h:674
const MCRegisterInfo & MRI
unsigned getLocMemOffset() const
const MachineInstrBuilder & addRegMask(const uint32_t *Mask) const
int64_t getSExtValue() const
Return the sign extended value.
Definition: Constants.h:124
iterator find(const KeyT &Val)
Definition: DenseMap.h:108
0 0 1 1 True if ordered and greater than or equal
Definition: InstrTypes.h:656
const MachineInstrBuilder & addReg(unsigned RegNo, unsigned flags=0, unsigned SubReg=0) const
unsigned getSourceAddressSpace() const
bool isVarArg() const
Definition: Function.cpp:175
unsigned AllocateStack(unsigned Size, unsigned Align)
const BasicBlock * getParent() const
Definition: Instruction.h:52
static const MachineInstrBuilder & addFullAddress(const MachineInstrBuilder &MIB, const X86AddressMode &AM)
FastISel * createFastISel(FunctionLoweringInfo &funcInfo, const TargetLibraryInfo *libInfo)
INITIALIZE_PASS(GlobalMerge,"global-merge","Global Merge", false, false) bool GlobalMerge const DataLayout * TD
bool hasAVX() const
Definition: X86Subtarget.h:265
MVT getSimpleVT() const
Definition: ValueTypes.h:749
signed greater or equal
Definition: InstrTypes.h:679
static unsigned X86ChooseCmpOpcode(EVT VT, const X86Subtarget *Subtarget)
gep_type_iterator gep_type_begin(const User *GEP)
bool contains(unsigned Reg) const