LLVM API Documentation

 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Groups Pages
AArch64ISelLowering.cpp
Go to the documentation of this file.
1 //===-- AArch64ISelLowering.cpp - AArch64 DAG Lowering Implementation -----===//
2 //
3 // The LLVM Compiler Infrastructure
4 //
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
7 //
8 //===----------------------------------------------------------------------===//
9 //
10 // This file defines the interfaces that AArch64 uses to lower LLVM code into a
11 // selection DAG.
12 //
13 //===----------------------------------------------------------------------===//
14 
15 #define DEBUG_TYPE "aarch64-isel"
16 #include "AArch64.h"
17 #include "AArch64ISelLowering.h"
19 #include "AArch64TargetMachine.h"
21 #include "Utils/AArch64BaseInfo.h"
22 #include "llvm/CodeGen/Analysis.h"
28 #include "llvm/IR/CallingConv.h"
29 
30 using namespace llvm;
31 
33  const AArch64Subtarget *Subtarget = &TM.getSubtarget<AArch64Subtarget>();
34 
35  if (Subtarget->isTargetLinux())
36  return new AArch64LinuxTargetObjectFile();
37  if (Subtarget->isTargetELF())
38  return new TargetLoweringObjectFileELF();
39  llvm_unreachable("unknown subtarget type");
40 }
41 
43  : TargetLowering(TM, createTLOF(TM)), Itins(TM.getInstrItineraryData()) {
44 
45  const AArch64Subtarget *Subtarget = &TM.getSubtarget<AArch64Subtarget>();
46 
47  // SIMD compares set the entire lane's bits to 1
49 
50  // Scalar register <-> type mapping
51  addRegisterClass(MVT::i32, &AArch64::GPR32RegClass);
52  addRegisterClass(MVT::i64, &AArch64::GPR64RegClass);
53 
54  if (Subtarget->hasFPARMv8()) {
55  addRegisterClass(MVT::f16, &AArch64::FPR16RegClass);
56  addRegisterClass(MVT::f32, &AArch64::FPR32RegClass);
57  addRegisterClass(MVT::f64, &AArch64::FPR64RegClass);
58  addRegisterClass(MVT::f128, &AArch64::FPR128RegClass);
59  }
60 
61  if (Subtarget->hasNEON()) {
62  // And the vectors
63  addRegisterClass(MVT::v1i8, &AArch64::FPR8RegClass);
64  addRegisterClass(MVT::v1i16, &AArch64::FPR16RegClass);
65  addRegisterClass(MVT::v1i32, &AArch64::FPR32RegClass);
66  addRegisterClass(MVT::v1i64, &AArch64::FPR64RegClass);
67  addRegisterClass(MVT::v1f32, &AArch64::FPR32RegClass);
68  addRegisterClass(MVT::v1f64, &AArch64::FPR64RegClass);
69  addRegisterClass(MVT::v8i8, &AArch64::FPR64RegClass);
70  addRegisterClass(MVT::v4i16, &AArch64::FPR64RegClass);
71  addRegisterClass(MVT::v2i32, &AArch64::FPR64RegClass);
72  addRegisterClass(MVT::v1i64, &AArch64::FPR64RegClass);
73  addRegisterClass(MVT::v2f32, &AArch64::FPR64RegClass);
74  addRegisterClass(MVT::v16i8, &AArch64::FPR128RegClass);
75  addRegisterClass(MVT::v8i16, &AArch64::FPR128RegClass);
76  addRegisterClass(MVT::v4i32, &AArch64::FPR128RegClass);
77  addRegisterClass(MVT::v2i64, &AArch64::FPR128RegClass);
78  addRegisterClass(MVT::v4f32, &AArch64::FPR128RegClass);
79  addRegisterClass(MVT::v2f64, &AArch64::FPR128RegClass);
80  }
81 
83 
84  // We combine OR nodes for bitfield and NEON BSL operations.
86 
91 
95 
96  // AArch64 does not have i1 loads, or much of anything for i1 really.
100 
105 
106  // We'll lower globals to wrappers for selection.
109 
110  // A64 instructions have the comparison predicate attached to the user of the
111  // result, but having a separate comparison is valuable for matching.
116 
121 
126 
128 
133 
137 
142 
144 
147 
152 
157 
160 
161  // Legal floating-point operations.
164 
167 
170 
173 
176 
179 
182 
185 
189 
190  // Illegal floating-point operations.
193 
196 
199 
202 
205 
208 
211 
214 
217 
220 
223 
226 
227  // Virtually no operation on f128 is legal, but LLVM can't expand them when
228  // there's a valid register class, so we need custom operations in most cases.
252 
253  // Lowering for many of the conversions is actually specified by the non-f128
254  // type. The LowerXXX function will be trivial when f128 isn't involved.
269 
270  // This prevents LLVM trying to compress double constants into a floating
271  // constant-pool entry and trying to load from there. It's of doubtful benefit
272  // for A64: we'd need LDR followed by FCVT, I believe.
276 
283 
284  setExceptionPointerRegister(AArch64::X0);
285  setExceptionSelectorRegister(AArch64::X1);
286 
287  if (Subtarget->hasNEON()) {
304 
317 
327 
341 
345 
349 
353 
357 
361 
365  }
366 }
367 
369  // It's reasonably important that this value matches the "natural" legal
370  // promotion from i1 for scalar types. Otherwise LegalizeTypes can get itself
371  // in a twist (e.g. inserting an any_extend which then becomes i64 -> i64).
372  if (!VT.isVector()) return MVT::i32;
374 }
375 
376 static void getExclusiveOperation(unsigned Size, AtomicOrdering Ord,
377  unsigned &LdrOpc,
378  unsigned &StrOpc) {
379  static const unsigned LoadBares[] = {AArch64::LDXR_byte, AArch64::LDXR_hword,
380  AArch64::LDXR_word, AArch64::LDXR_dword};
381  static const unsigned LoadAcqs[] = {AArch64::LDAXR_byte, AArch64::LDAXR_hword,
382  AArch64::LDAXR_word, AArch64::LDAXR_dword};
383  static const unsigned StoreBares[] = {AArch64::STXR_byte, AArch64::STXR_hword,
384  AArch64::STXR_word, AArch64::STXR_dword};
385  static const unsigned StoreRels[] = {AArch64::STLXR_byte,AArch64::STLXR_hword,
386  AArch64::STLXR_word, AArch64::STLXR_dword};
387 
388  const unsigned *LoadOps, *StoreOps;
389  if (Ord == Acquire || Ord == AcquireRelease || Ord == SequentiallyConsistent)
390  LoadOps = LoadAcqs;
391  else
392  LoadOps = LoadBares;
393 
394  if (Ord == Release || Ord == AcquireRelease || Ord == SequentiallyConsistent)
395  StoreOps = StoreRels;
396  else
397  StoreOps = StoreBares;
398 
399  assert(isPowerOf2_32(Size) && Size <= 8 &&
400  "unsupported size for atomic binary op!");
401 
402  LdrOpc = LoadOps[Log2_32(Size)];
403  StrOpc = StoreOps[Log2_32(Size)];
404 }
405 
408  unsigned Size,
409  unsigned BinOpcode) const {
410  // This also handles ATOMIC_SWAP, indicated by BinOpcode==0.
412 
413  const BasicBlock *LLVM_BB = BB->getBasicBlock();
414  MachineFunction *MF = BB->getParent();
416  ++It;
417 
418  unsigned dest = MI->getOperand(0).getReg();
419  unsigned ptr = MI->getOperand(1).getReg();
420  unsigned incr = MI->getOperand(2).getReg();
421  AtomicOrdering Ord = static_cast<AtomicOrdering>(MI->getOperand(3).getImm());
422  DebugLoc dl = MI->getDebugLoc();
423 
425 
426  unsigned ldrOpc, strOpc;
427  getExclusiveOperation(Size, Ord, ldrOpc, strOpc);
428 
429  MachineBasicBlock *loopMBB = MF->CreateMachineBasicBlock(LLVM_BB);
430  MachineBasicBlock *exitMBB = MF->CreateMachineBasicBlock(LLVM_BB);
431  MF->insert(It, loopMBB);
432  MF->insert(It, exitMBB);
433 
434  // Transfer the remainder of BB and its successor edges to exitMBB.
435  exitMBB->splice(exitMBB->begin(), BB,
437  BB->end());
438  exitMBB->transferSuccessorsAndUpdatePHIs(BB);
439 
440  const TargetRegisterClass *TRC
441  = Size == 8 ? &AArch64::GPR64RegClass : &AArch64::GPR32RegClass;
442  unsigned scratch = (!BinOpcode) ? incr : MRI.createVirtualRegister(TRC);
443 
444  // thisMBB:
445  // ...
446  // fallthrough --> loopMBB
447  BB->addSuccessor(loopMBB);
448 
449  // loopMBB:
450  // ldxr dest, ptr
451  // <binop> scratch, dest, incr
452  // stxr stxr_status, scratch, ptr
453  // cbnz stxr_status, loopMBB
454  // fallthrough --> exitMBB
455  BB = loopMBB;
456  BuildMI(BB, dl, TII->get(ldrOpc), dest).addReg(ptr);
457  if (BinOpcode) {
458  // All arithmetic operations we'll be creating are designed to take an extra
459  // shift or extend operand, which we can conveniently set to zero.
460 
461  // Operand order needs to go the other way for NAND.
462  if (BinOpcode == AArch64::BICwww_lsl || BinOpcode == AArch64::BICxxx_lsl)
463  BuildMI(BB, dl, TII->get(BinOpcode), scratch)
464  .addReg(incr).addReg(dest).addImm(0);
465  else
466  BuildMI(BB, dl, TII->get(BinOpcode), scratch)
467  .addReg(dest).addReg(incr).addImm(0);
468  }
469 
470  // From the stxr, the register is GPR32; from the cmp it's GPR32wsp
471  unsigned stxr_status = MRI.createVirtualRegister(&AArch64::GPR32RegClass);
472  MRI.constrainRegClass(stxr_status, &AArch64::GPR32wspRegClass);
473 
474  BuildMI(BB, dl, TII->get(strOpc), stxr_status).addReg(scratch).addReg(ptr);
475  BuildMI(BB, dl, TII->get(AArch64::CBNZw))
476  .addReg(stxr_status).addMBB(loopMBB);
477 
478  BB->addSuccessor(loopMBB);
479  BB->addSuccessor(exitMBB);
480 
481  // exitMBB:
482  // ...
483  BB = exitMBB;
484 
485  MI->eraseFromParent(); // The instruction is gone now.
486 
487  return BB;
488 }
489 
492  MachineBasicBlock *BB,
493  unsigned Size,
494  unsigned CmpOp,
495  A64CC::CondCodes Cond) const {
497 
498  const BasicBlock *LLVM_BB = BB->getBasicBlock();
499  MachineFunction *MF = BB->getParent();
501  ++It;
502 
503  unsigned dest = MI->getOperand(0).getReg();
504  unsigned ptr = MI->getOperand(1).getReg();
505  unsigned incr = MI->getOperand(2).getReg();
506  AtomicOrdering Ord = static_cast<AtomicOrdering>(MI->getOperand(3).getImm());
507 
508  unsigned oldval = dest;
509  DebugLoc dl = MI->getDebugLoc();
510 
512  const TargetRegisterClass *TRC, *TRCsp;
513  if (Size == 8) {
514  TRC = &AArch64::GPR64RegClass;
515  TRCsp = &AArch64::GPR64xspRegClass;
516  } else {
517  TRC = &AArch64::GPR32RegClass;
518  TRCsp = &AArch64::GPR32wspRegClass;
519  }
520 
521  unsigned ldrOpc, strOpc;
522  getExclusiveOperation(Size, Ord, ldrOpc, strOpc);
523 
524  MachineBasicBlock *loopMBB = MF->CreateMachineBasicBlock(LLVM_BB);
525  MachineBasicBlock *exitMBB = MF->CreateMachineBasicBlock(LLVM_BB);
526  MF->insert(It, loopMBB);
527  MF->insert(It, exitMBB);
528 
529  // Transfer the remainder of BB and its successor edges to exitMBB.
530  exitMBB->splice(exitMBB->begin(), BB,
532  BB->end());
533  exitMBB->transferSuccessorsAndUpdatePHIs(BB);
534 
535  unsigned scratch = MRI.createVirtualRegister(TRC);
536  MRI.constrainRegClass(scratch, TRCsp);
537 
538  // thisMBB:
539  // ...
540  // fallthrough --> loopMBB
541  BB->addSuccessor(loopMBB);
542 
543  // loopMBB:
544  // ldxr dest, ptr
545  // cmp incr, dest (, sign extend if necessary)
546  // csel scratch, dest, incr, cond
547  // stxr stxr_status, scratch, ptr
548  // cbnz stxr_status, loopMBB
549  // fallthrough --> exitMBB
550  BB = loopMBB;
551  BuildMI(BB, dl, TII->get(ldrOpc), dest).addReg(ptr);
552 
553  // Build compare and cmov instructions.
554  MRI.constrainRegClass(incr, TRCsp);
555  BuildMI(BB, dl, TII->get(CmpOp))
556  .addReg(incr).addReg(oldval).addImm(0);
557 
558  BuildMI(BB, dl, TII->get(Size == 8 ? AArch64::CSELxxxc : AArch64::CSELwwwc),
559  scratch)
560  .addReg(oldval).addReg(incr).addImm(Cond);
561 
562  unsigned stxr_status = MRI.createVirtualRegister(&AArch64::GPR32RegClass);
563  MRI.constrainRegClass(stxr_status, &AArch64::GPR32wspRegClass);
564 
565  BuildMI(BB, dl, TII->get(strOpc), stxr_status)
566  .addReg(scratch).addReg(ptr);
567  BuildMI(BB, dl, TII->get(AArch64::CBNZw))
568  .addReg(stxr_status).addMBB(loopMBB);
569 
570  BB->addSuccessor(loopMBB);
571  BB->addSuccessor(exitMBB);
572 
573  // exitMBB:
574  // ...
575  BB = exitMBB;
576 
577  MI->eraseFromParent(); // The instruction is gone now.
578 
579  return BB;
580 }
581 
584  MachineBasicBlock *BB,
585  unsigned Size) const {
586  unsigned dest = MI->getOperand(0).getReg();
587  unsigned ptr = MI->getOperand(1).getReg();
588  unsigned oldval = MI->getOperand(2).getReg();
589  unsigned newval = MI->getOperand(3).getReg();
590  AtomicOrdering Ord = static_cast<AtomicOrdering>(MI->getOperand(4).getImm());
592  DebugLoc dl = MI->getDebugLoc();
593 
595  const TargetRegisterClass *TRCsp;
596  TRCsp = Size == 8 ? &AArch64::GPR64xspRegClass : &AArch64::GPR32wspRegClass;
597 
598  unsigned ldrOpc, strOpc;
599  getExclusiveOperation(Size, Ord, ldrOpc, strOpc);
600 
601  MachineFunction *MF = BB->getParent();
602  const BasicBlock *LLVM_BB = BB->getBasicBlock();
604  ++It; // insert the new blocks after the current block
605 
606  MachineBasicBlock *loop1MBB = MF->CreateMachineBasicBlock(LLVM_BB);
607  MachineBasicBlock *loop2MBB = MF->CreateMachineBasicBlock(LLVM_BB);
608  MachineBasicBlock *exitMBB = MF->CreateMachineBasicBlock(LLVM_BB);
609  MF->insert(It, loop1MBB);
610  MF->insert(It, loop2MBB);
611  MF->insert(It, exitMBB);
612 
613  // Transfer the remainder of BB and its successor edges to exitMBB.
614  exitMBB->splice(exitMBB->begin(), BB,
616  BB->end());
617  exitMBB->transferSuccessorsAndUpdatePHIs(BB);
618 
619  // thisMBB:
620  // ...
621  // fallthrough --> loop1MBB
622  BB->addSuccessor(loop1MBB);
623 
624  // loop1MBB:
625  // ldxr dest, [ptr]
626  // cmp dest, oldval
627  // b.ne exitMBB
628  BB = loop1MBB;
629  BuildMI(BB, dl, TII->get(ldrOpc), dest).addReg(ptr);
630 
631  unsigned CmpOp = Size == 8 ? AArch64::CMPxx_lsl : AArch64::CMPww_lsl;
632  MRI.constrainRegClass(dest, TRCsp);
633  BuildMI(BB, dl, TII->get(CmpOp))
634  .addReg(dest).addReg(oldval).addImm(0);
635  BuildMI(BB, dl, TII->get(AArch64::Bcc))
636  .addImm(A64CC::NE).addMBB(exitMBB);
637  BB->addSuccessor(loop2MBB);
638  BB->addSuccessor(exitMBB);
639 
640  // loop2MBB:
641  // strex stxr_status, newval, [ptr]
642  // cbnz stxr_status, loop1MBB
643  BB = loop2MBB;
644  unsigned stxr_status = MRI.createVirtualRegister(&AArch64::GPR32RegClass);
645  MRI.constrainRegClass(stxr_status, &AArch64::GPR32wspRegClass);
646 
647  BuildMI(BB, dl, TII->get(strOpc), stxr_status).addReg(newval).addReg(ptr);
648  BuildMI(BB, dl, TII->get(AArch64::CBNZw))
649  .addReg(stxr_status).addMBB(loop1MBB);
650  BB->addSuccessor(loop1MBB);
651  BB->addSuccessor(exitMBB);
652 
653  // exitMBB:
654  // ...
655  BB = exitMBB;
656 
657  MI->eraseFromParent(); // The instruction is gone now.
658 
659  return BB;
660 }
661 
664  MachineBasicBlock *MBB) const {
665  // We materialise the F128CSEL pseudo-instruction using conditional branches
666  // and loads, giving an instruciton sequence like:
667  // str q0, [sp]
668  // b.ne IfTrue
669  // b Finish
670  // IfTrue:
671  // str q1, [sp]
672  // Finish:
673  // ldr q0, [sp]
674  //
675  // Using virtual registers would probably not be beneficial since COPY
676  // instructions are expensive for f128 (there's no actual instruction to
677  // implement them).
678  //
679  // An alternative would be to do an integer-CSEL on some address. E.g.:
680  // mov x0, sp
681  // add x1, sp, #16
682  // str q0, [x0]
683  // str q1, [x1]
684  // csel x0, x0, x1, ne
685  // ldr q0, [x0]
686  //
687  // It's unclear which approach is actually optimal.
689  MachineFunction *MF = MBB->getParent();
690  const BasicBlock *LLVM_BB = MBB->getBasicBlock();
691  DebugLoc DL = MI->getDebugLoc();
692  MachineFunction::iterator It = MBB;
693  ++It;
694 
695  unsigned DestReg = MI->getOperand(0).getReg();
696  unsigned IfTrueReg = MI->getOperand(1).getReg();
697  unsigned IfFalseReg = MI->getOperand(2).getReg();
698  unsigned CondCode = MI->getOperand(3).getImm();
699  bool NZCVKilled = MI->getOperand(4).isKill();
700 
701  MachineBasicBlock *TrueBB = MF->CreateMachineBasicBlock(LLVM_BB);
702  MachineBasicBlock *EndBB = MF->CreateMachineBasicBlock(LLVM_BB);
703  MF->insert(It, TrueBB);
704  MF->insert(It, EndBB);
705 
706  // Transfer rest of current basic-block to EndBB
707  EndBB->splice(EndBB->begin(), MBB,
709  MBB->end());
711 
712  // We need somewhere to store the f128 value needed.
713  int ScratchFI = MF->getFrameInfo()->CreateSpillStackObject(16, 16);
714 
715  // [... start of incoming MBB ...]
716  // str qIFFALSE, [sp]
717  // b.cc IfTrue
718  // b Done
719  BuildMI(MBB, DL, TII->get(AArch64::LSFP128_STR))
720  .addReg(IfFalseReg)
721  .addFrameIndex(ScratchFI)
722  .addImm(0);
723  BuildMI(MBB, DL, TII->get(AArch64::Bcc))
724  .addImm(CondCode)
725  .addMBB(TrueBB);
726  BuildMI(MBB, DL, TII->get(AArch64::Bimm))
727  .addMBB(EndBB);
728  MBB->addSuccessor(TrueBB);
729  MBB->addSuccessor(EndBB);
730 
731  if (!NZCVKilled) {
732  // NZCV is live-through TrueBB.
733  TrueBB->addLiveIn(AArch64::NZCV);
734  EndBB->addLiveIn(AArch64::NZCV);
735  }
736 
737  // IfTrue:
738  // str qIFTRUE, [sp]
739  BuildMI(TrueBB, DL, TII->get(AArch64::LSFP128_STR))
740  .addReg(IfTrueReg)
741  .addFrameIndex(ScratchFI)
742  .addImm(0);
743 
744  // Note: fallthrough. We can rely on LLVM adding a branch if it reorders the
745  // blocks.
746  TrueBB->addSuccessor(EndBB);
747 
748  // Done:
749  // ldr qDEST, [sp]
750  // [... rest of incoming MBB ...]
751  MachineInstr *StartOfEnd = EndBB->begin();
752  BuildMI(*EndBB, StartOfEnd, DL, TII->get(AArch64::LSFP128_LDR), DestReg)
753  .addFrameIndex(ScratchFI)
754  .addImm(0);
755 
756  MI->eraseFromParent();
757  return EndBB;
758 }
759 
762  MachineBasicBlock *MBB) const {
763  switch (MI->getOpcode()) {
764  default: llvm_unreachable("Unhandled instruction with custom inserter");
765  case AArch64::F128CSEL:
766  return EmitF128CSEL(MI, MBB);
767  case AArch64::ATOMIC_LOAD_ADD_I8:
768  return emitAtomicBinary(MI, MBB, 1, AArch64::ADDwww_lsl);
769  case AArch64::ATOMIC_LOAD_ADD_I16:
770  return emitAtomicBinary(MI, MBB, 2, AArch64::ADDwww_lsl);
771  case AArch64::ATOMIC_LOAD_ADD_I32:
772  return emitAtomicBinary(MI, MBB, 4, AArch64::ADDwww_lsl);
773  case AArch64::ATOMIC_LOAD_ADD_I64:
774  return emitAtomicBinary(MI, MBB, 8, AArch64::ADDxxx_lsl);
775 
776  case AArch64::ATOMIC_LOAD_SUB_I8:
777  return emitAtomicBinary(MI, MBB, 1, AArch64::SUBwww_lsl);
778  case AArch64::ATOMIC_LOAD_SUB_I16:
779  return emitAtomicBinary(MI, MBB, 2, AArch64::SUBwww_lsl);
780  case AArch64::ATOMIC_LOAD_SUB_I32:
781  return emitAtomicBinary(MI, MBB, 4, AArch64::SUBwww_lsl);
782  case AArch64::ATOMIC_LOAD_SUB_I64:
783  return emitAtomicBinary(MI, MBB, 8, AArch64::SUBxxx_lsl);
784 
785  case AArch64::ATOMIC_LOAD_AND_I8:
786  return emitAtomicBinary(MI, MBB, 1, AArch64::ANDwww_lsl);
787  case AArch64::ATOMIC_LOAD_AND_I16:
788  return emitAtomicBinary(MI, MBB, 2, AArch64::ANDwww_lsl);
789  case AArch64::ATOMIC_LOAD_AND_I32:
790  return emitAtomicBinary(MI, MBB, 4, AArch64::ANDwww_lsl);
791  case AArch64::ATOMIC_LOAD_AND_I64:
792  return emitAtomicBinary(MI, MBB, 8, AArch64::ANDxxx_lsl);
793 
794  case AArch64::ATOMIC_LOAD_OR_I8:
795  return emitAtomicBinary(MI, MBB, 1, AArch64::ORRwww_lsl);
796  case AArch64::ATOMIC_LOAD_OR_I16:
797  return emitAtomicBinary(MI, MBB, 2, AArch64::ORRwww_lsl);
798  case AArch64::ATOMIC_LOAD_OR_I32:
799  return emitAtomicBinary(MI, MBB, 4, AArch64::ORRwww_lsl);
800  case AArch64::ATOMIC_LOAD_OR_I64:
801  return emitAtomicBinary(MI, MBB, 8, AArch64::ORRxxx_lsl);
802 
803  case AArch64::ATOMIC_LOAD_XOR_I8:
804  return emitAtomicBinary(MI, MBB, 1, AArch64::EORwww_lsl);
805  case AArch64::ATOMIC_LOAD_XOR_I16:
806  return emitAtomicBinary(MI, MBB, 2, AArch64::EORwww_lsl);
807  case AArch64::ATOMIC_LOAD_XOR_I32:
808  return emitAtomicBinary(MI, MBB, 4, AArch64::EORwww_lsl);
809  case AArch64::ATOMIC_LOAD_XOR_I64:
810  return emitAtomicBinary(MI, MBB, 8, AArch64::EORxxx_lsl);
811 
812  case AArch64::ATOMIC_LOAD_NAND_I8:
813  return emitAtomicBinary(MI, MBB, 1, AArch64::BICwww_lsl);
814  case AArch64::ATOMIC_LOAD_NAND_I16:
815  return emitAtomicBinary(MI, MBB, 2, AArch64::BICwww_lsl);
816  case AArch64::ATOMIC_LOAD_NAND_I32:
817  return emitAtomicBinary(MI, MBB, 4, AArch64::BICwww_lsl);
818  case AArch64::ATOMIC_LOAD_NAND_I64:
819  return emitAtomicBinary(MI, MBB, 8, AArch64::BICxxx_lsl);
820 
821  case AArch64::ATOMIC_LOAD_MIN_I8:
822  return emitAtomicBinaryMinMax(MI, MBB, 1, AArch64::CMPww_sxtb, A64CC::GT);
823  case AArch64::ATOMIC_LOAD_MIN_I16:
824  return emitAtomicBinaryMinMax(MI, MBB, 2, AArch64::CMPww_sxth, A64CC::GT);
825  case AArch64::ATOMIC_LOAD_MIN_I32:
826  return emitAtomicBinaryMinMax(MI, MBB, 4, AArch64::CMPww_lsl, A64CC::GT);
827  case AArch64::ATOMIC_LOAD_MIN_I64:
828  return emitAtomicBinaryMinMax(MI, MBB, 8, AArch64::CMPxx_lsl, A64CC::GT);
829 
830  case AArch64::ATOMIC_LOAD_MAX_I8:
831  return emitAtomicBinaryMinMax(MI, MBB, 1, AArch64::CMPww_sxtb, A64CC::LT);
832  case AArch64::ATOMIC_LOAD_MAX_I16:
833  return emitAtomicBinaryMinMax(MI, MBB, 2, AArch64::CMPww_sxth, A64CC::LT);
834  case AArch64::ATOMIC_LOAD_MAX_I32:
835  return emitAtomicBinaryMinMax(MI, MBB, 4, AArch64::CMPww_lsl, A64CC::LT);
836  case AArch64::ATOMIC_LOAD_MAX_I64:
837  return emitAtomicBinaryMinMax(MI, MBB, 8, AArch64::CMPxx_lsl, A64CC::LT);
838 
839  case AArch64::ATOMIC_LOAD_UMIN_I8:
840  return emitAtomicBinaryMinMax(MI, MBB, 1, AArch64::CMPww_uxtb, A64CC::HI);
841  case AArch64::ATOMIC_LOAD_UMIN_I16:
842  return emitAtomicBinaryMinMax(MI, MBB, 2, AArch64::CMPww_uxth, A64CC::HI);
843  case AArch64::ATOMIC_LOAD_UMIN_I32:
844  return emitAtomicBinaryMinMax(MI, MBB, 4, AArch64::CMPww_lsl, A64CC::HI);
845  case AArch64::ATOMIC_LOAD_UMIN_I64:
846  return emitAtomicBinaryMinMax(MI, MBB, 8, AArch64::CMPxx_lsl, A64CC::HI);
847 
848  case AArch64::ATOMIC_LOAD_UMAX_I8:
849  return emitAtomicBinaryMinMax(MI, MBB, 1, AArch64::CMPww_uxtb, A64CC::LO);
850  case AArch64::ATOMIC_LOAD_UMAX_I16:
851  return emitAtomicBinaryMinMax(MI, MBB, 2, AArch64::CMPww_uxth, A64CC::LO);
852  case AArch64::ATOMIC_LOAD_UMAX_I32:
853  return emitAtomicBinaryMinMax(MI, MBB, 4, AArch64::CMPww_lsl, A64CC::LO);
854  case AArch64::ATOMIC_LOAD_UMAX_I64:
855  return emitAtomicBinaryMinMax(MI, MBB, 8, AArch64::CMPxx_lsl, A64CC::LO);
856 
857  case AArch64::ATOMIC_SWAP_I8:
858  return emitAtomicBinary(MI, MBB, 1, 0);
859  case AArch64::ATOMIC_SWAP_I16:
860  return emitAtomicBinary(MI, MBB, 2, 0);
861  case AArch64::ATOMIC_SWAP_I32:
862  return emitAtomicBinary(MI, MBB, 4, 0);
863  case AArch64::ATOMIC_SWAP_I64:
864  return emitAtomicBinary(MI, MBB, 8, 0);
865 
866  case AArch64::ATOMIC_CMP_SWAP_I8:
867  return emitAtomicCmpSwap(MI, MBB, 1);
868  case AArch64::ATOMIC_CMP_SWAP_I16:
869  return emitAtomicCmpSwap(MI, MBB, 2);
870  case AArch64::ATOMIC_CMP_SWAP_I32:
871  return emitAtomicCmpSwap(MI, MBB, 4);
872  case AArch64::ATOMIC_CMP_SWAP_I64:
873  return emitAtomicCmpSwap(MI, MBB, 8);
874  }
875 }
876 
877 
878 const char *AArch64TargetLowering::getTargetNodeName(unsigned Opcode) const {
879  switch (Opcode) {
880  case AArch64ISD::BR_CC: return "AArch64ISD::BR_CC";
881  case AArch64ISD::Call: return "AArch64ISD::Call";
882  case AArch64ISD::FPMOV: return "AArch64ISD::FPMOV";
883  case AArch64ISD::GOTLoad: return "AArch64ISD::GOTLoad";
884  case AArch64ISD::BFI: return "AArch64ISD::BFI";
885  case AArch64ISD::EXTR: return "AArch64ISD::EXTR";
886  case AArch64ISD::Ret: return "AArch64ISD::Ret";
887  case AArch64ISD::SBFX: return "AArch64ISD::SBFX";
888  case AArch64ISD::SELECT_CC: return "AArch64ISD::SELECT_CC";
889  case AArch64ISD::SETCC: return "AArch64ISD::SETCC";
890  case AArch64ISD::TC_RETURN: return "AArch64ISD::TC_RETURN";
891  case AArch64ISD::THREAD_POINTER: return "AArch64ISD::THREAD_POINTER";
892  case AArch64ISD::TLSDESCCALL: return "AArch64ISD::TLSDESCCALL";
893  case AArch64ISD::WrapperLarge: return "AArch64ISD::WrapperLarge";
894  case AArch64ISD::WrapperSmall: return "AArch64ISD::WrapperSmall";
895 
897  return "AArch64ISD::NEON_BSL";
899  return "AArch64ISD::NEON_MOVIMM";
901  return "AArch64ISD::NEON_MVNIMM";
903  return "AArch64ISD::NEON_FMOVIMM";
905  return "AArch64ISD::NEON_CMP";
907  return "AArch64ISD::NEON_CMPZ";
909  return "AArch64ISD::NEON_TST";
911  return "AArch64ISD::NEON_QSHLs";
913  return "AArch64ISD::NEON_QSHLu";
915  return "AArch64ISD::NEON_VDUP";
917  return "AArch64ISD::NEON_VDUPLANE";
919  return "AArch64ISD::NEON_REV16";
921  return "AArch64ISD::NEON_REV32";
923  return "AArch64ISD::NEON_REV64";
925  return "AArch64ISD::NEON_UZP1";
927  return "AArch64ISD::NEON_UZP2";
929  return "AArch64ISD::NEON_ZIP1";
931  return "AArch64ISD::NEON_ZIP2";
933  return "AArch64ISD::NEON_TRN1";
935  return "AArch64ISD::NEON_TRN2";
937  return "AArch64ISD::NEON_LD1_UPD";
939  return "AArch64ISD::NEON_LD2_UPD";
941  return "AArch64ISD::NEON_LD3_UPD";
943  return "AArch64ISD::NEON_LD4_UPD";
945  return "AArch64ISD::NEON_ST1_UPD";
947  return "AArch64ISD::NEON_ST2_UPD";
949  return "AArch64ISD::NEON_ST3_UPD";
951  return "AArch64ISD::NEON_ST4_UPD";
953  return "AArch64ISD::NEON_LD1x2_UPD";
955  return "AArch64ISD::NEON_LD1x3_UPD";
957  return "AArch64ISD::NEON_LD1x4_UPD";
959  return "AArch64ISD::NEON_ST1x2_UPD";
961  return "AArch64ISD::NEON_ST1x3_UPD";
963  return "AArch64ISD::NEON_ST1x4_UPD";
965  return "AArch64ISD::NEON_LD2DUP";
967  return "AArch64ISD::NEON_LD3DUP";
969  return "AArch64ISD::NEON_LD4DUP";
971  return "AArch64ISD::NEON_LD2DUP_UPD";
973  return "AArch64ISD::NEON_LD3DUP_UPD";
975  return "AArch64ISD::NEON_LD4DUP_UPD";
977  return "AArch64ISD::NEON_LD2LN_UPD";
979  return "AArch64ISD::NEON_LD3LN_UPD";
981  return "AArch64ISD::NEON_LD4LN_UPD";
983  return "AArch64ISD::NEON_ST2LN_UPD";
985  return "AArch64ISD::NEON_ST3LN_UPD";
987  return "AArch64ISD::NEON_ST4LN_UPD";
989  return "AArch64ISD::NEON_VEXTRACT";
990  default:
991  return NULL;
992  }
993 }
994 
995 static const uint16_t AArch64FPRArgRegs[] = {
996  AArch64::Q0, AArch64::Q1, AArch64::Q2, AArch64::Q3,
997  AArch64::Q4, AArch64::Q5, AArch64::Q6, AArch64::Q7
998 };
1000 
1001 static const uint16_t AArch64ArgRegs[] = {
1002  AArch64::X0, AArch64::X1, AArch64::X2, AArch64::X3,
1003  AArch64::X4, AArch64::X5, AArch64::X6, AArch64::X7
1004 };
1006 
1007 static bool CC_AArch64NoMoreRegs(unsigned ValNo, MVT ValVT, MVT LocVT,
1008  CCValAssign::LocInfo LocInfo,
1009  ISD::ArgFlagsTy ArgFlags, CCState &State) {
1010  // Mark all remaining general purpose registers as allocated. We don't
1011  // backtrack: if (for example) an i128 gets put on the stack, no subsequent
1012  // i64 will go in registers (C.11).
1013  for (unsigned i = 0; i < NumArgRegs; ++i)
1014  State.AllocateReg(AArch64ArgRegs[i]);
1015 
1016  return false;
1017 }
1018 
1019 #include "AArch64GenCallingConv.inc"
1020 
1022 
1023  switch(CC) {
1024  default: llvm_unreachable("Unsupported calling convention");
1025  case CallingConv::Fast:
1026  case CallingConv::C:
1027  return CC_A64_APCS;
1028  }
1029 }
1030 
1031 void
1033  SDLoc DL, SDValue &Chain) const {
1034  MachineFunction &MF = DAG.getMachineFunction();
1035  MachineFrameInfo *MFI = MF.getFrameInfo();
1036  AArch64MachineFunctionInfo *FuncInfo
1038 
1039  SmallVector<SDValue, 8> MemOps;
1040 
1041  unsigned FirstVariadicGPR = CCInfo.getFirstUnallocated(AArch64ArgRegs,
1042  NumArgRegs);
1043  unsigned FirstVariadicFPR = CCInfo.getFirstUnallocated(AArch64FPRArgRegs,
1044  NumFPRArgRegs);
1045 
1046  unsigned GPRSaveSize = 8 * (NumArgRegs - FirstVariadicGPR);
1047  int GPRIdx = 0;
1048  if (GPRSaveSize != 0) {
1049  GPRIdx = MFI->CreateStackObject(GPRSaveSize, 8, false);
1050 
1051  SDValue FIN = DAG.getFrameIndex(GPRIdx, getPointerTy());
1052 
1053  for (unsigned i = FirstVariadicGPR; i < NumArgRegs; ++i) {
1054  unsigned VReg = MF.addLiveIn(AArch64ArgRegs[i], &AArch64::GPR64RegClass);
1055  SDValue Val = DAG.getCopyFromReg(Chain, DL, VReg, MVT::i64);
1056  SDValue Store = DAG.getStore(Val.getValue(1), DL, Val, FIN,
1058  false, false, 0);
1059  MemOps.push_back(Store);
1060  FIN = DAG.getNode(ISD::ADD, DL, getPointerTy(), FIN,
1061  DAG.getConstant(8, getPointerTy()));
1062  }
1063  }
1064 
1065  if (getSubtarget()->hasFPARMv8()) {
1066  unsigned FPRSaveSize = 16 * (NumFPRArgRegs - FirstVariadicFPR);
1067  int FPRIdx = 0;
1068  // According to the AArch64 Procedure Call Standard, section B.1/B.3, we
1069  // can omit a register save area if we know we'll never use registers of
1070  // that class.
1071  if (FPRSaveSize != 0) {
1072  FPRIdx = MFI->CreateStackObject(FPRSaveSize, 16, false);
1073 
1074  SDValue FIN = DAG.getFrameIndex(FPRIdx, getPointerTy());
1075 
1076  for (unsigned i = FirstVariadicFPR; i < NumFPRArgRegs; ++i) {
1077  unsigned VReg = MF.addLiveIn(AArch64FPRArgRegs[i],
1078  &AArch64::FPR128RegClass);
1079  SDValue Val = DAG.getCopyFromReg(Chain, DL, VReg, MVT::f128);
1080  SDValue Store = DAG.getStore(Val.getValue(1), DL, Val, FIN,
1082  false, false, 0);
1083  MemOps.push_back(Store);
1084  FIN = DAG.getNode(ISD::ADD, DL, getPointerTy(), FIN,
1085  DAG.getConstant(16, getPointerTy()));
1086  }
1087  }
1088  FuncInfo->setVariadicFPRIdx(FPRIdx);
1089  FuncInfo->setVariadicFPRSize(FPRSaveSize);
1090  }
1091 
1092  int StackIdx = MFI->CreateFixedObject(8, CCInfo.getNextStackOffset(), true);
1093 
1094  FuncInfo->setVariadicStackIdx(StackIdx);
1095  FuncInfo->setVariadicGPRIdx(GPRIdx);
1096  FuncInfo->setVariadicGPRSize(GPRSaveSize);
1097 
1098  if (!MemOps.empty()) {
1099  Chain = DAG.getNode(ISD::TokenFactor, DL, MVT::Other, &MemOps[0],
1100  MemOps.size());
1101  }
1102 }
1103 
1104 
1105 SDValue
1107  CallingConv::ID CallConv, bool isVarArg,
1109  SDLoc dl, SelectionDAG &DAG,
1110  SmallVectorImpl<SDValue> &InVals) const {
1111  MachineFunction &MF = DAG.getMachineFunction();
1112  AArch64MachineFunctionInfo *FuncInfo
1114  MachineFrameInfo *MFI = MF.getFrameInfo();
1115  bool TailCallOpt = MF.getTarget().Options.GuaranteedTailCallOpt;
1116 
1118  CCState CCInfo(CallConv, isVarArg, DAG.getMachineFunction(),
1119  getTargetMachine(), ArgLocs, *DAG.getContext());
1120  CCInfo.AnalyzeFormalArguments(Ins, CCAssignFnForNode(CallConv));
1121 
1122  SmallVector<SDValue, 16> ArgValues;
1123 
1124  SDValue ArgValue;
1125  for (unsigned i = 0, e = ArgLocs.size(); i != e; ++i) {
1126  CCValAssign &VA = ArgLocs[i];
1127  ISD::ArgFlagsTy Flags = Ins[i].Flags;
1128 
1129  if (Flags.isByVal()) {
1130  // Byval is used for small structs and HFAs in the PCS, but the system
1131  // should work in a non-compliant manner for larger structs.
1132  EVT PtrTy = getPointerTy();
1133  int Size = Flags.getByValSize();
1134  unsigned NumRegs = (Size + 7) / 8;
1135 
1136  unsigned FrameIdx = MFI->CreateFixedObject(8 * NumRegs,
1137  VA.getLocMemOffset(),
1138  false);
1139  SDValue FrameIdxN = DAG.getFrameIndex(FrameIdx, PtrTy);
1140  InVals.push_back(FrameIdxN);
1141 
1142  continue;
1143  } else if (VA.isRegLoc()) {
1144  MVT RegVT = VA.getLocVT();
1145  const TargetRegisterClass *RC = getRegClassFor(RegVT);
1146  unsigned Reg = MF.addLiveIn(VA.getLocReg(), RC);
1147 
1148  ArgValue = DAG.getCopyFromReg(Chain, dl, Reg, RegVT);
1149  } else { // VA.isRegLoc()
1150  assert(VA.isMemLoc());
1151 
1152  int FI = MFI->CreateFixedObject(VA.getLocVT().getSizeInBits()/8,
1153  VA.getLocMemOffset(), true);
1154 
1155  SDValue FIN = DAG.getFrameIndex(FI, getPointerTy());
1156  ArgValue = DAG.getLoad(VA.getLocVT(), dl, Chain, FIN,
1158  false, false, false, 0);
1159 
1160 
1161  }
1162 
1163  switch (VA.getLocInfo()) {
1164  default: llvm_unreachable("Unknown loc info!");
1165  case CCValAssign::Full: break;
1166  case CCValAssign::BCvt:
1167  ArgValue = DAG.getNode(ISD::BITCAST,dl, VA.getValVT(), ArgValue);
1168  break;
1169  case CCValAssign::SExt:
1170  case CCValAssign::ZExt:
1171  case CCValAssign::AExt: {
1172  unsigned DestSize = VA.getValVT().getSizeInBits();
1173  unsigned DestSubReg;
1174 
1175  switch (DestSize) {
1176  case 8: DestSubReg = AArch64::sub_8; break;
1177  case 16: DestSubReg = AArch64::sub_16; break;
1178  case 32: DestSubReg = AArch64::sub_32; break;
1179  case 64: DestSubReg = AArch64::sub_64; break;
1180  default: llvm_unreachable("Unexpected argument promotion");
1181  }
1182 
1184  VA.getValVT(), ArgValue,
1185  DAG.getTargetConstant(DestSubReg, MVT::i32)),
1186  0);
1187  break;
1188  }
1189  }
1190 
1191  InVals.push_back(ArgValue);
1192  }
1193 
1194  if (isVarArg)
1195  SaveVarArgRegisters(CCInfo, DAG, dl, Chain);
1196 
1197  unsigned StackArgSize = CCInfo.getNextStackOffset();
1198  if (DoesCalleeRestoreStack(CallConv, TailCallOpt)) {
1199  // This is a non-standard ABI so by fiat I say we're allowed to make full
1200  // use of the stack area to be popped, which must be aligned to 16 bytes in
1201  // any case:
1202  StackArgSize = RoundUpToAlignment(StackArgSize, 16);
1203 
1204  // If we're expected to restore the stack (e.g. fastcc) then we'll be adding
1205  // a multiple of 16.
1206  FuncInfo->setArgumentStackToRestore(StackArgSize);
1207 
1208  // This realignment carries over to the available bytes below. Our own
1209  // callers will guarantee the space is free by giving an aligned value to
1210  // CALLSEQ_START.
1211  }
1212  // Even if we're not expected to free up the space, it's useful to know how
1213  // much is there while considering tail calls (because we can reuse it).
1214  FuncInfo->setBytesInStackArgArea(StackArgSize);
1215 
1216  return Chain;
1217 }
1218 
1219 SDValue
1221  CallingConv::ID CallConv, bool isVarArg,
1222  const SmallVectorImpl<ISD::OutputArg> &Outs,
1223  const SmallVectorImpl<SDValue> &OutVals,
1224  SDLoc dl, SelectionDAG &DAG) const {
1225  // CCValAssign - represent the assignment of the return value to a location.
1227 
1228  // CCState - Info about the registers and stack slots.
1229  CCState CCInfo(CallConv, isVarArg, DAG.getMachineFunction(),
1230  getTargetMachine(), RVLocs, *DAG.getContext());
1231 
1232  // Analyze outgoing return values.
1233  CCInfo.AnalyzeReturn(Outs, CCAssignFnForNode(CallConv));
1234 
1235  SDValue Flag;
1236  SmallVector<SDValue, 4> RetOps(1, Chain);
1237 
1238  for (unsigned i = 0, e = RVLocs.size(); i != e; ++i) {
1239  // PCS: "If the type, T, of the result of a function is such that
1240  // void func(T arg) would require that arg be passed as a value in a
1241  // register (or set of registers) according to the rules in 5.4, then the
1242  // result is returned in the same registers as would be used for such an
1243  // argument.
1244  //
1245  // Otherwise, the caller shall reserve a block of memory of sufficient
1246  // size and alignment to hold the result. The address of the memory block
1247  // shall be passed as an additional argument to the function in x8."
1248  //
1249  // This is implemented in two places. The register-return values are dealt
1250  // with here, more complex returns are passed as an sret parameter, which
1251  // means we don't have to worry about it during actual return.
1252  CCValAssign &VA = RVLocs[i];
1253  assert(VA.isRegLoc() && "Only register-returns should be created by PCS");
1254 
1255 
1256  SDValue Arg = OutVals[i];
1257 
1258  // There's no convenient note in the ABI about this as there is for normal
1259  // arguments, but it says return values are passed in the same registers as
1260  // an argument would be. I believe that includes the comments about
1261  // unspecified higher bits, putting the burden of widening on the *caller*
1262  // for return values.
1263  switch (VA.getLocInfo()) {
1264  default: llvm_unreachable("Unknown loc info");
1265  case CCValAssign::Full: break;
1266  case CCValAssign::SExt:
1267  case CCValAssign::ZExt:
1268  case CCValAssign::AExt:
1269  // Floating-point values should only be extended when they're going into
1270  // memory, which can't happen here so an integer extend is acceptable.
1271  Arg = DAG.getNode(ISD::ANY_EXTEND, dl, VA.getLocVT(), Arg);
1272  break;
1273  case CCValAssign::BCvt:
1274  Arg = DAG.getNode(ISD::BITCAST, dl, VA.getLocVT(), Arg);
1275  break;
1276  }
1277 
1278  Chain = DAG.getCopyToReg(Chain, dl, VA.getLocReg(), Arg, Flag);
1279  Flag = Chain.getValue(1);
1280  RetOps.push_back(DAG.getRegister(VA.getLocReg(), VA.getLocVT()));
1281  }
1282 
1283  RetOps[0] = Chain; // Update chain.
1284 
1285  // Add the flag if we have it.
1286  if (Flag.getNode())
1287  RetOps.push_back(Flag);
1288 
1289  return DAG.getNode(AArch64ISD::Ret, dl, MVT::Other,
1290  &RetOps[0], RetOps.size());
1291 }
1292 
1293 SDValue
1295  SmallVectorImpl<SDValue> &InVals) const {
1296  SelectionDAG &DAG = CLI.DAG;
1297  SDLoc &dl = CLI.DL;
1299  SmallVectorImpl<SDValue> &OutVals = CLI.OutVals;
1301  SDValue Chain = CLI.Chain;
1302  SDValue Callee = CLI.Callee;
1303  bool &IsTailCall = CLI.IsTailCall;
1304  CallingConv::ID CallConv = CLI.CallConv;
1305  bool IsVarArg = CLI.IsVarArg;
1306 
1307  MachineFunction &MF = DAG.getMachineFunction();
1308  AArch64MachineFunctionInfo *FuncInfo
1310  bool TailCallOpt = MF.getTarget().Options.GuaranteedTailCallOpt;
1311  bool IsStructRet = !Outs.empty() && Outs[0].Flags.isSRet();
1312  bool IsSibCall = false;
1313 
1314  if (IsTailCall) {
1315  IsTailCall = IsEligibleForTailCallOptimization(Callee, CallConv,
1316  IsVarArg, IsStructRet, MF.getFunction()->hasStructRetAttr(),
1317  Outs, OutVals, Ins, DAG);
1318 
1319  // A sibling call is one where we're under the usual C ABI and not planning
1320  // to change that but can still do a tail call:
1321  if (!TailCallOpt && IsTailCall)
1322  IsSibCall = true;
1323  }
1324 
1326  CCState CCInfo(CallConv, IsVarArg, DAG.getMachineFunction(),
1327  getTargetMachine(), ArgLocs, *DAG.getContext());
1328  CCInfo.AnalyzeCallOperands(Outs, CCAssignFnForNode(CallConv));
1329 
1330  // On AArch64 (and all other architectures I'm aware of) the most this has to
1331  // do is adjust the stack pointer.
1332  unsigned NumBytes = RoundUpToAlignment(CCInfo.getNextStackOffset(), 16);
1333  if (IsSibCall) {
1334  // Since we're not changing the ABI to make this a tail call, the memory
1335  // operands are already available in the caller's incoming argument space.
1336  NumBytes = 0;
1337  }
1338 
1339  // FPDiff is the byte offset of the call's argument area from the callee's.
1340  // Stores to callee stack arguments will be placed in FixedStackSlots offset
1341  // by this amount for a tail call. In a sibling call it must be 0 because the
1342  // caller will deallocate the entire stack and the callee still expects its
1343  // arguments to begin at SP+0. Completely unused for non-tail calls.
1344  int FPDiff = 0;
1345 
1346  if (IsTailCall && !IsSibCall) {
1347  unsigned NumReusableBytes = FuncInfo->getBytesInStackArgArea();
1348 
1349  // FPDiff will be negative if this tail call requires more space than we
1350  // would automatically have in our incoming argument space. Positive if we
1351  // can actually shrink the stack.
1352  FPDiff = NumReusableBytes - NumBytes;
1353 
1354  // The stack pointer must be 16-byte aligned at all times it's used for a
1355  // memory operation, which in practice means at *all* times and in
1356  // particular across call boundaries. Therefore our own arguments started at
1357  // a 16-byte aligned SP and the delta applied for the tail call should
1358  // satisfy the same constraint.
1359  assert(FPDiff % 16 == 0 && "unaligned stack on tail call");
1360  }
1361 
1362  if (!IsSibCall)
1363  Chain = DAG.getCALLSEQ_START(Chain, DAG.getIntPtrConstant(NumBytes, true),
1364  dl);
1365 
1366  SDValue StackPtr = DAG.getCopyFromReg(Chain, dl, AArch64::XSP,
1367  getPointerTy());
1368 
1369  SmallVector<SDValue, 8> MemOpChains;
1371 
1372  for (unsigned i = 0, e = ArgLocs.size(); i != e; ++i) {
1373  CCValAssign &VA = ArgLocs[i];
1374  ISD::ArgFlagsTy Flags = Outs[i].Flags;
1375  SDValue Arg = OutVals[i];
1376 
1377  // Callee does the actual widening, so all extensions just use an implicit
1378  // definition of the rest of the Loc. Aesthetically, this would be nicer as
1379  // an ANY_EXTEND, but that isn't valid for floating-point types and this
1380  // alternative works on integer types too.
1381  switch (VA.getLocInfo()) {
1382  default: llvm_unreachable("Unknown loc info!");
1383  case CCValAssign::Full: break;
1384  case CCValAssign::SExt:
1385  case CCValAssign::ZExt:
1386  case CCValAssign::AExt: {
1387  unsigned SrcSize = VA.getValVT().getSizeInBits();
1388  unsigned SrcSubReg;
1389 
1390  switch (SrcSize) {
1391  case 8: SrcSubReg = AArch64::sub_8; break;
1392  case 16: SrcSubReg = AArch64::sub_16; break;
1393  case 32: SrcSubReg = AArch64::sub_32; break;
1394  case 64: SrcSubReg = AArch64::sub_64; break;
1395  default: llvm_unreachable("Unexpected argument promotion");
1396  }
1397 
1399  VA.getLocVT(),
1400  DAG.getUNDEF(VA.getLocVT()),
1401  Arg,
1402  DAG.getTargetConstant(SrcSubReg, MVT::i32)),
1403  0);
1404 
1405  break;
1406  }
1407  case CCValAssign::BCvt:
1408  Arg = DAG.getNode(ISD::BITCAST, dl, VA.getLocVT(), Arg);
1409  break;
1410  }
1411 
1412  if (VA.isRegLoc()) {
1413  // A normal register (sub-) argument. For now we just note it down because
1414  // we want to copy things into registers as late as possible to avoid
1415  // register-pressure (and possibly worse).
1416  RegsToPass.push_back(std::make_pair(VA.getLocReg(), Arg));
1417  continue;
1418  }
1419 
1420  assert(VA.isMemLoc() && "unexpected argument location");
1421 
1422  SDValue DstAddr;
1423  MachinePointerInfo DstInfo;
1424  if (IsTailCall) {
1425  uint32_t OpSize = Flags.isByVal() ? Flags.getByValSize() :
1426  VA.getLocVT().getSizeInBits();
1427  OpSize = (OpSize + 7) / 8;
1428  int32_t Offset = VA.getLocMemOffset() + FPDiff;
1429  int FI = MF.getFrameInfo()->CreateFixedObject(OpSize, Offset, true);
1430 
1431  DstAddr = DAG.getFrameIndex(FI, getPointerTy());
1432  DstInfo = MachinePointerInfo::getFixedStack(FI);
1433 
1434  // Make sure any stack arguments overlapping with where we're storing are
1435  // loaded before this eventual operation. Otherwise they'll be clobbered.
1436  Chain = addTokenForArgument(Chain, DAG, MF.getFrameInfo(), FI);
1437  } else {
1438  SDValue PtrOff = DAG.getIntPtrConstant(VA.getLocMemOffset());
1439 
1440  DstAddr = DAG.getNode(ISD::ADD, dl, getPointerTy(), StackPtr, PtrOff);
1442  }
1443 
1444  if (Flags.isByVal()) {
1445  SDValue SizeNode = DAG.getConstant(Flags.getByValSize(), MVT::i64);
1446  SDValue Cpy = DAG.getMemcpy(Chain, dl, DstAddr, Arg, SizeNode,
1447  Flags.getByValAlign(),
1448  /*isVolatile = */ false,
1449  /*alwaysInline = */ false,
1450  DstInfo, MachinePointerInfo(0));
1451  MemOpChains.push_back(Cpy);
1452  } else {
1453  // Normal stack argument, put it where it's needed.
1454  SDValue Store = DAG.getStore(Chain, dl, Arg, DstAddr, DstInfo,
1455  false, false, 0);
1456  MemOpChains.push_back(Store);
1457  }
1458  }
1459 
1460  // The loads and stores generated above shouldn't clash with each
1461  // other. Combining them with this TokenFactor notes that fact for the rest of
1462  // the backend.
1463  if (!MemOpChains.empty())
1464  Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other,
1465  &MemOpChains[0], MemOpChains.size());
1466 
1467  // Most of the rest of the instructions need to be glued together; we don't
1468  // want assignments to actual registers used by a call to be rearranged by a
1469  // well-meaning scheduler.
1470  SDValue InFlag;
1471 
1472  for (unsigned i = 0, e = RegsToPass.size(); i != e; ++i) {
1473  Chain = DAG.getCopyToReg(Chain, dl, RegsToPass[i].first,
1474  RegsToPass[i].second, InFlag);
1475  InFlag = Chain.getValue(1);
1476  }
1477 
1478  // The linker is responsible for inserting veneers when necessary to put a
1479  // function call destination in range, so we don't need to bother with a
1480  // wrapper here.
1481  if (GlobalAddressSDNode *G = dyn_cast<GlobalAddressSDNode>(Callee)) {
1482  const GlobalValue *GV = G->getGlobal();
1483  Callee = DAG.getTargetGlobalAddress(GV, dl, getPointerTy());
1484  } else if (ExternalSymbolSDNode *S = dyn_cast<ExternalSymbolSDNode>(Callee)) {
1485  const char *Sym = S->getSymbol();
1486  Callee = DAG.getTargetExternalSymbol(Sym, getPointerTy());
1487  }
1488 
1489  // We don't usually want to end the call-sequence here because we would tidy
1490  // the frame up *after* the call, however in the ABI-changing tail-call case
1491  // we've carefully laid out the parameters so that when sp is reset they'll be
1492  // in the correct location.
1493  if (IsTailCall && !IsSibCall) {
1494  Chain = DAG.getCALLSEQ_END(Chain, DAG.getIntPtrConstant(NumBytes, true),
1495  DAG.getIntPtrConstant(0, true), InFlag, dl);
1496  InFlag = Chain.getValue(1);
1497  }
1498 
1499  // We produce the following DAG scheme for the actual call instruction:
1500  // (AArch64Call Chain, Callee, reg1, ..., regn, preserveMask, inflag?
1501  //
1502  // Most arguments aren't going to be used and just keep the values live as
1503  // far as LLVM is concerned. It's expected to be selected as simply "bl
1504  // callee" (for a direct, non-tail call).
1505  std::vector<SDValue> Ops;
1506  Ops.push_back(Chain);
1507  Ops.push_back(Callee);
1508 
1509  if (IsTailCall) {
1510  // Each tail call may have to adjust the stack by a different amount, so
1511  // this information must travel along with the operation for eventual
1512  // consumption by emitEpilogue.
1513  Ops.push_back(DAG.getTargetConstant(FPDiff, MVT::i32));
1514  }
1515 
1516  for (unsigned i = 0, e = RegsToPass.size(); i != e; ++i)
1517  Ops.push_back(DAG.getRegister(RegsToPass[i].first,
1518  RegsToPass[i].second.getValueType()));
1519 
1520 
1521  // Add a register mask operand representing the call-preserved registers. This
1522  // is used later in codegen to constrain register-allocation.
1524  const uint32_t *Mask = TRI->getCallPreservedMask(CallConv);
1525  assert(Mask && "Missing call preserved mask for calling convention");
1526  Ops.push_back(DAG.getRegisterMask(Mask));
1527 
1528  // If we needed glue, put it in as the last argument.
1529  if (InFlag.getNode())
1530  Ops.push_back(InFlag);
1531 
1532  SDVTList NodeTys = DAG.getVTList(MVT::Other, MVT::Glue);
1533 
1534  if (IsTailCall) {
1535  return DAG.getNode(AArch64ISD::TC_RETURN, dl, NodeTys, &Ops[0], Ops.size());
1536  }
1537 
1538  Chain = DAG.getNode(AArch64ISD::Call, dl, NodeTys, &Ops[0], Ops.size());
1539  InFlag = Chain.getValue(1);
1540 
1541  // Now we can reclaim the stack, just as well do it before working out where
1542  // our return value is.
1543  if (!IsSibCall) {
1544  uint64_t CalleePopBytes
1545  = DoesCalleeRestoreStack(CallConv, TailCallOpt) ? NumBytes : 0;
1546 
1547  Chain = DAG.getCALLSEQ_END(Chain, DAG.getIntPtrConstant(NumBytes, true),
1548  DAG.getIntPtrConstant(CalleePopBytes, true),
1549  InFlag, dl);
1550  InFlag = Chain.getValue(1);
1551  }
1552 
1553  return LowerCallResult(Chain, InFlag, CallConv,
1554  IsVarArg, Ins, dl, DAG, InVals);
1555 }
1556 
1557 SDValue
1559  CallingConv::ID CallConv, bool IsVarArg,
1561  SDLoc dl, SelectionDAG &DAG,
1562  SmallVectorImpl<SDValue> &InVals) const {
1563  // Assign locations to each value returned by this call.
1565  CCState CCInfo(CallConv, IsVarArg, DAG.getMachineFunction(),
1566  getTargetMachine(), RVLocs, *DAG.getContext());
1567  CCInfo.AnalyzeCallResult(Ins, CCAssignFnForNode(CallConv));
1568 
1569  for (unsigned i = 0; i != RVLocs.size(); ++i) {
1570  CCValAssign VA = RVLocs[i];
1571 
1572  // Return values that are too big to fit into registers should use an sret
1573  // pointer, so this can be a lot simpler than the main argument code.
1574  assert(VA.isRegLoc() && "Memory locations not expected for call return");
1575 
1576  SDValue Val = DAG.getCopyFromReg(Chain, dl, VA.getLocReg(), VA.getLocVT(),
1577  InFlag);
1578  Chain = Val.getValue(1);
1579  InFlag = Val.getValue(2);
1580 
1581  switch (VA.getLocInfo()) {
1582  default: llvm_unreachable("Unknown loc info!");
1583  case CCValAssign::Full: break;
1584  case CCValAssign::BCvt:
1585  Val = DAG.getNode(ISD::BITCAST, dl, VA.getValVT(), Val);
1586  break;
1587  case CCValAssign::ZExt:
1588  case CCValAssign::SExt:
1589  case CCValAssign::AExt:
1590  // Floating-point arguments only get extended/truncated if they're going
1591  // in memory, so using the integer operation is acceptable here.
1592  Val = DAG.getNode(ISD::TRUNCATE, dl, VA.getValVT(), Val);
1593  break;
1594  }
1595 
1596  InVals.push_back(Val);
1597  }
1598 
1599  return Chain;
1600 }
1601 
1602 bool
1604  CallingConv::ID CalleeCC,
1605  bool IsVarArg,
1606  bool IsCalleeStructRet,
1607  bool IsCallerStructRet,
1608  const SmallVectorImpl<ISD::OutputArg> &Outs,
1609  const SmallVectorImpl<SDValue> &OutVals,
1611  SelectionDAG& DAG) const {
1612 
1613  // For CallingConv::C this function knows whether the ABI needs
1614  // changing. That's not true for other conventions so they will have to opt in
1615  // manually.
1616  if (!IsTailCallConvention(CalleeCC) && CalleeCC != CallingConv::C)
1617  return false;
1618 
1619  const MachineFunction &MF = DAG.getMachineFunction();
1620  const Function *CallerF = MF.getFunction();
1621  CallingConv::ID CallerCC = CallerF->getCallingConv();
1622  bool CCMatch = CallerCC == CalleeCC;
1623 
1624  // Byval parameters hand the function a pointer directly into the stack area
1625  // we want to reuse during a tail call. Working around this *is* possible (see
1626  // X86) but less efficient and uglier in LowerCall.
1627  for (Function::const_arg_iterator i = CallerF->arg_begin(),
1628  e = CallerF->arg_end(); i != e; ++i)
1629  if (i->hasByValAttr())
1630  return false;
1631 
1633  if (IsTailCallConvention(CalleeCC) && CCMatch)
1634  return true;
1635  return false;
1636  }
1637 
1638  // Now we search for cases where we can use a tail call without changing the
1639  // ABI. Sibcall is used in some places (particularly gcc) to refer to this
1640  // concept.
1641 
1642  // I want anyone implementing a new calling convention to think long and hard
1643  // about this assert.
1644  assert((!IsVarArg || CalleeCC == CallingConv::C)
1645  && "Unexpected variadic calling convention");
1646 
1647  if (IsVarArg && !Outs.empty()) {
1648  // At least two cases here: if caller is fastcc then we can't have any
1649  // memory arguments (we'd be expected to clean up the stack afterwards). If
1650  // caller is C then we could potentially use its argument area.
1651 
1652  // FIXME: for now we take the most conservative of these in both cases:
1653  // disallow all variadic memory operands.
1655  CCState CCInfo(CalleeCC, IsVarArg, DAG.getMachineFunction(),
1656  getTargetMachine(), ArgLocs, *DAG.getContext());
1657 
1658  CCInfo.AnalyzeCallOperands(Outs, CCAssignFnForNode(CalleeCC));
1659  for (unsigned i = 0, e = ArgLocs.size(); i != e; ++i)
1660  if (!ArgLocs[i].isRegLoc())
1661  return false;
1662  }
1663 
1664  // If the calling conventions do not match, then we'd better make sure the
1665  // results are returned in the same way as what the caller expects.
1666  if (!CCMatch) {
1668  CCState CCInfo1(CalleeCC, false, DAG.getMachineFunction(),
1669  getTargetMachine(), RVLocs1, *DAG.getContext());
1670  CCInfo1.AnalyzeCallResult(Ins, CCAssignFnForNode(CalleeCC));
1671 
1673  CCState CCInfo2(CallerCC, false, DAG.getMachineFunction(),
1674  getTargetMachine(), RVLocs2, *DAG.getContext());
1675  CCInfo2.AnalyzeCallResult(Ins, CCAssignFnForNode(CallerCC));
1676 
1677  if (RVLocs1.size() != RVLocs2.size())
1678  return false;
1679  for (unsigned i = 0, e = RVLocs1.size(); i != e; ++i) {
1680  if (RVLocs1[i].isRegLoc() != RVLocs2[i].isRegLoc())
1681  return false;
1682  if (RVLocs1[i].getLocInfo() != RVLocs2[i].getLocInfo())
1683  return false;
1684  if (RVLocs1[i].isRegLoc()) {
1685  if (RVLocs1[i].getLocReg() != RVLocs2[i].getLocReg())
1686  return false;
1687  } else {
1688  if (RVLocs1[i].getLocMemOffset() != RVLocs2[i].getLocMemOffset())
1689  return false;
1690  }
1691  }
1692  }
1693 
1694  // Nothing more to check if the callee is taking no arguments
1695  if (Outs.empty())
1696  return true;
1697 
1699  CCState CCInfo(CalleeCC, IsVarArg, DAG.getMachineFunction(),
1700  getTargetMachine(), ArgLocs, *DAG.getContext());
1701 
1702  CCInfo.AnalyzeCallOperands(Outs, CCAssignFnForNode(CalleeCC));
1703 
1704  const AArch64MachineFunctionInfo *FuncInfo
1706 
1707  // If the stack arguments for this call would fit into our own save area then
1708  // the call can be made tail.
1709  return CCInfo.getNextStackOffset() <= FuncInfo->getBytesInStackArgArea();
1710 }
1711 
1713  bool TailCallOpt) const {
1714  return CallCC == CallingConv::Fast && TailCallOpt;
1715 }
1716 
1718  return CallCC == CallingConv::Fast;
1719 }
1720 
1722  SelectionDAG &DAG,
1723  MachineFrameInfo *MFI,
1724  int ClobberedFI) const {
1725  SmallVector<SDValue, 8> ArgChains;
1726  int64_t FirstByte = MFI->getObjectOffset(ClobberedFI);
1727  int64_t LastByte = FirstByte + MFI->getObjectSize(ClobberedFI) - 1;
1728 
1729  // Include the original chain at the beginning of the list. When this is
1730  // used by target LowerCall hooks, this helps legalize find the
1731  // CALLSEQ_BEGIN node.
1732  ArgChains.push_back(Chain);
1733 
1734  // Add a chain value for each stack argument corresponding
1735  for (SDNode::use_iterator U = DAG.getEntryNode().getNode()->use_begin(),
1736  UE = DAG.getEntryNode().getNode()->use_end(); U != UE; ++U)
1737  if (LoadSDNode *L = dyn_cast<LoadSDNode>(*U))
1738  if (FrameIndexSDNode *FI = dyn_cast<FrameIndexSDNode>(L->getBasePtr()))
1739  if (FI->getIndex() < 0) {
1740  int64_t InFirstByte = MFI->getObjectOffset(FI->getIndex());
1741  int64_t InLastByte = InFirstByte;
1742  InLastByte += MFI->getObjectSize(FI->getIndex()) - 1;
1743 
1744  if ((InFirstByte <= FirstByte && FirstByte <= InLastByte) ||
1745  (FirstByte <= InFirstByte && InFirstByte <= LastByte))
1746  ArgChains.push_back(SDValue(L, 1));
1747  }
1748 
1749  // Build a tokenfactor for all the chains.
1750  return DAG.getNode(ISD::TokenFactor, SDLoc(Chain), MVT::Other,
1751  &ArgChains[0], ArgChains.size());
1752 }
1753 
1755  switch (CC) {
1756  case ISD::SETEQ: return A64CC::EQ;
1757  case ISD::SETGT: return A64CC::GT;
1758  case ISD::SETGE: return A64CC::GE;
1759  case ISD::SETLT: return A64CC::LT;
1760  case ISD::SETLE: return A64CC::LE;
1761  case ISD::SETNE: return A64CC::NE;
1762  case ISD::SETUGT: return A64CC::HI;
1763  case ISD::SETUGE: return A64CC::HS;
1764  case ISD::SETULT: return A64CC::LO;
1765  case ISD::SETULE: return A64CC::LS;
1766  default: llvm_unreachable("Unexpected condition code");
1767  }
1768 }
1769 
1771  // icmp is implemented using adds/subs immediate, which take an unsigned
1772  // 12-bit immediate, optionally shifted left by 12 bits.
1773 
1774  // Symmetric by using adds/subs
1775  if (Val < 0)
1776  Val = -Val;
1777 
1778  return (Val & ~0xfff) == 0 || (Val & ~0xfff000) == 0;
1779 }
1780 
1782  ISD::CondCode CC, SDValue &A64cc,
1783  SelectionDAG &DAG, SDLoc &dl) const {
1784  if (ConstantSDNode *RHSC = dyn_cast<ConstantSDNode>(RHS.getNode())) {
1785  int64_t C = 0;
1786  EVT VT = RHSC->getValueType(0);
1787  bool knownInvalid = false;
1788 
1789  // I'm not convinced the rest of LLVM handles these edge cases properly, but
1790  // we can at least get it right.
1791  if (isSignedIntSetCC(CC)) {
1792  C = RHSC->getSExtValue();
1793  } else if (RHSC->getZExtValue() > INT64_MAX) {
1794  // A 64-bit constant not representable by a signed 64-bit integer is far
1795  // too big to fit into a SUBS immediate anyway.
1796  knownInvalid = true;
1797  } else {
1798  C = RHSC->getZExtValue();
1799  }
1800 
1801  if (!knownInvalid && !isLegalICmpImmediate(C)) {
1802  // Constant does not fit, try adjusting it by one?
1803  switch (CC) {
1804  default: break;
1805  case ISD::SETLT:
1806  case ISD::SETGE:
1807  if (isLegalICmpImmediate(C-1)) {
1808  CC = (CC == ISD::SETLT) ? ISD::SETLE : ISD::SETGT;
1809  RHS = DAG.getConstant(C-1, VT);
1810  }
1811  break;
1812  case ISD::SETULT:
1813  case ISD::SETUGE:
1814  if (isLegalICmpImmediate(C-1)) {
1815  CC = (CC == ISD::SETULT) ? ISD::SETULE : ISD::SETUGT;
1816  RHS = DAG.getConstant(C-1, VT);
1817  }
1818  break;
1819  case ISD::SETLE:
1820  case ISD::SETGT:
1821  if (isLegalICmpImmediate(C+1)) {
1822  CC = (CC == ISD::SETLE) ? ISD::SETLT : ISD::SETGE;
1823  RHS = DAG.getConstant(C+1, VT);
1824  }
1825  break;
1826  case ISD::SETULE:
1827  case ISD::SETUGT:
1828  if (isLegalICmpImmediate(C+1)) {
1829  CC = (CC == ISD::SETULE) ? ISD::SETULT : ISD::SETUGE;
1830  RHS = DAG.getConstant(C+1, VT);
1831  }
1832  break;
1833  }
1834  }
1835  }
1836 
1838  A64cc = DAG.getConstant(CondCode, MVT::i32);
1839  return DAG.getNode(AArch64ISD::SETCC, dl, MVT::i32, LHS, RHS,
1840  DAG.getCondCode(CC));
1841 }
1842 
1844  A64CC::CondCodes &Alternative) {
1846  Alternative = A64CC::Invalid;
1847 
1848  switch (CC) {
1849  default: llvm_unreachable("Unknown FP condition!");
1850  case ISD::SETEQ:
1851  case ISD::SETOEQ: CondCode = A64CC::EQ; break;
1852  case ISD::SETGT:
1853  case ISD::SETOGT: CondCode = A64CC::GT; break;
1854  case ISD::SETGE:
1855  case ISD::SETOGE: CondCode = A64CC::GE; break;
1856  case ISD::SETOLT: CondCode = A64CC::MI; break;
1857  case ISD::SETOLE: CondCode = A64CC::LS; break;
1858  case ISD::SETONE: CondCode = A64CC::MI; Alternative = A64CC::GT; break;
1859  case ISD::SETO: CondCode = A64CC::VC; break;
1860  case ISD::SETUO: CondCode = A64CC::VS; break;
1861  case ISD::SETUEQ: CondCode = A64CC::EQ; Alternative = A64CC::VS; break;
1862  case ISD::SETUGT: CondCode = A64CC::HI; break;
1863  case ISD::SETUGE: CondCode = A64CC::PL; break;
1864  case ISD::SETLT:
1865  case ISD::SETULT: CondCode = A64CC::LT; break;
1866  case ISD::SETLE:
1867  case ISD::SETULE: CondCode = A64CC::LE; break;
1868  case ISD::SETNE:
1869  case ISD::SETUNE: CondCode = A64CC::NE; break;
1870  }
1871  return CondCode;
1872 }
1873 
1874 SDValue
1876  SDLoc DL(Op);
1877  EVT PtrVT = getPointerTy();
1878  const BlockAddress *BA = cast<BlockAddressSDNode>(Op)->getBlockAddress();
1879 
1880  switch(getTargetMachine().getCodeModel()) {
1881  case CodeModel::Small:
1882  // The most efficient code is PC-relative anyway for the small memory model,
1883  // so we don't need to worry about relocation model.
1884  return DAG.getNode(AArch64ISD::WrapperSmall, DL, PtrVT,
1885  DAG.getTargetBlockAddress(BA, PtrVT, 0,
1887  DAG.getTargetBlockAddress(BA, PtrVT, 0,
1889  DAG.getConstant(/*Alignment=*/ 4, MVT::i32));
1890  case CodeModel::Large:
1891  return DAG.getNode(
1892  AArch64ISD::WrapperLarge, DL, PtrVT,
1893  DAG.getTargetBlockAddress(BA, PtrVT, 0, AArch64II::MO_ABS_G3),
1896  DAG.getTargetBlockAddress(BA, PtrVT, 0, AArch64II::MO_ABS_G0_NC));
1897  default:
1898  llvm_unreachable("Only small and large code models supported now");
1899  }
1900 }
1901 
1902 
1903 // (BRCOND chain, val, dest)
1904 SDValue
1906  SDLoc dl(Op);
1907  SDValue Chain = Op.getOperand(0);
1908  SDValue TheBit = Op.getOperand(1);
1909  SDValue DestBB = Op.getOperand(2);
1910 
1911  // AArch64 BooleanContents is the default UndefinedBooleanContent, which means
1912  // that as the consumer we are responsible for ignoring rubbish in higher
1913  // bits.
1914  TheBit = DAG.getNode(ISD::AND, dl, MVT::i32, TheBit,
1915  DAG.getConstant(1, MVT::i32));
1916 
1917  SDValue A64CMP = DAG.getNode(AArch64ISD::SETCC, dl, MVT::i32, TheBit,
1918  DAG.getConstant(0, TheBit.getValueType()),
1919  DAG.getCondCode(ISD::SETNE));
1920 
1921  return DAG.getNode(AArch64ISD::BR_CC, dl, MVT::Other, Chain,
1922  A64CMP, DAG.getConstant(A64CC::NE, MVT::i32),
1923  DestBB);
1924 }
1925 
1926 // (BR_CC chain, condcode, lhs, rhs, dest)
1927 SDValue
1929  SDLoc dl(Op);
1930  SDValue Chain = Op.getOperand(0);
1931  ISD::CondCode CC = cast<CondCodeSDNode>(Op.getOperand(1))->get();
1932  SDValue LHS = Op.getOperand(2);
1933  SDValue RHS = Op.getOperand(3);
1934  SDValue DestBB = Op.getOperand(4);
1935 
1936  if (LHS.getValueType() == MVT::f128) {
1937  // f128 comparisons are lowered to runtime calls by a routine which sets
1938  // LHS, RHS and CC appropriately for the rest of this function to continue.
1939  softenSetCCOperands(DAG, MVT::f128, LHS, RHS, CC, dl);
1940 
1941  // If softenSetCCOperands returned a scalar, we need to compare the result
1942  // against zero to select between true and false values.
1943  if (RHS.getNode() == 0) {
1944  RHS = DAG.getConstant(0, LHS.getValueType());
1945  CC = ISD::SETNE;
1946  }
1947  }
1948 
1949  if (LHS.getValueType().isInteger()) {
1950  SDValue A64cc;
1951 
1952  // Integers are handled in a separate function because the combinations of
1953  // immediates and tests can get hairy and we may want to fiddle things.
1954  SDValue CmpOp = getSelectableIntSetCC(LHS, RHS, CC, A64cc, DAG, dl);
1955 
1956  return DAG.getNode(AArch64ISD::BR_CC, dl, MVT::Other,
1957  Chain, CmpOp, A64cc, DestBB);
1958  }
1959 
1960  // Note that some LLVM floating-point CondCodes can't be lowered to a single
1961  // conditional branch, hence FPCCToA64CC can set a second test, where either
1962  // passing is sufficient.
1963  A64CC::CondCodes CondCode, Alternative = A64CC::Invalid;
1964  CondCode = FPCCToA64CC(CC, Alternative);
1965  SDValue A64cc = DAG.getConstant(CondCode, MVT::i32);
1966  SDValue SetCC = DAG.getNode(AArch64ISD::SETCC, dl, MVT::i32, LHS, RHS,
1967  DAG.getCondCode(CC));
1968  SDValue A64BR_CC = DAG.getNode(AArch64ISD::BR_CC, dl, MVT::Other,
1969  Chain, SetCC, A64cc, DestBB);
1970 
1971  if (Alternative != A64CC::Invalid) {
1972  A64cc = DAG.getConstant(Alternative, MVT::i32);
1973  A64BR_CC = DAG.getNode(AArch64ISD::BR_CC, dl, MVT::Other,
1974  A64BR_CC, SetCC, A64cc, DestBB);
1975 
1976  }
1977 
1978  return A64BR_CC;
1979 }
1980 
1981 SDValue
1983  RTLIB::Libcall Call) const {
1984  ArgListTy Args;
1985  ArgListEntry Entry;
1986  for (unsigned i = 0, e = Op->getNumOperands(); i != e; ++i) {
1987  EVT ArgVT = Op.getOperand(i).getValueType();
1988  Type *ArgTy = ArgVT.getTypeForEVT(*DAG.getContext());
1989  Entry.Node = Op.getOperand(i); Entry.Ty = ArgTy;
1990  Entry.isSExt = false;
1991  Entry.isZExt = false;
1992  Args.push_back(Entry);
1993  }
1994  SDValue Callee = DAG.getExternalSymbol(getLibcallName(Call), getPointerTy());
1995 
1996  Type *RetTy = Op.getValueType().getTypeForEVT(*DAG.getContext());
1997 
1998  // By default, the input chain to this libcall is the entry node of the
1999  // function. If the libcall is going to be emitted as a tail call then
2000  // isUsedByReturnOnly will change it to the right chain if the return
2001  // node which is being folded has a non-entry input chain.
2002  SDValue InChain = DAG.getEntryNode();
2003 
2004  // isTailCall may be true since the callee does not reference caller stack
2005  // frame. Check if it's in the right position.
2006  SDValue TCChain = InChain;
2007  bool isTailCall = isInTailCallPosition(DAG, Op.getNode(), TCChain);
2008  if (isTailCall)
2009  InChain = TCChain;
2010 
2012  CallLoweringInfo CLI(InChain, RetTy, false, false, false, false,
2013  0, getLibcallCallingConv(Call), isTailCall,
2014  /*doesNotReturn=*/false, /*isReturnValueUsed=*/true,
2015  Callee, Args, DAG, SDLoc(Op));
2016  std::pair<SDValue, SDValue> CallInfo = LowerCallTo(CLI);
2017 
2018  if (!CallInfo.second.getNode())
2019  // It's a tailcall, return the chain (which is the DAG root).
2020  return DAG.getRoot();
2021 
2022  return CallInfo.first;
2023 }
2024 
2025 SDValue
2027  if (Op.getOperand(0).getValueType() != MVT::f128) {
2028  // It's legal except when f128 is involved
2029  return Op;
2030  }
2031 
2032  RTLIB::Libcall LC;
2034 
2035  SDValue SrcVal = Op.getOperand(0);
2036  return makeLibCall(DAG, LC, Op.getValueType(), &SrcVal, 1,
2037  /*isSigned*/ false, SDLoc(Op)).first;
2038 }
2039 
2040 SDValue
2042  assert(Op.getValueType() == MVT::f128 && "Unexpected lowering");
2043 
2044  RTLIB::Libcall LC;
2046 
2047  return LowerF128ToCall(Op, DAG, LC);
2048 }
2049 
2050 SDValue
2052  bool IsSigned) const {
2053  if (Op.getOperand(0).getValueType() != MVT::f128) {
2054  // It's legal except when f128 is involved
2055  return Op;
2056  }
2057 
2058  RTLIB::Libcall LC;
2059  if (IsSigned)
2061  else
2063 
2064  return LowerF128ToCall(Op, DAG, LC);
2065 }
2066 
2068  MachineFunction &MF = DAG.getMachineFunction();
2069  MachineFrameInfo *MFI = MF.getFrameInfo();
2070  MFI->setReturnAddressIsTaken(true);
2071 
2072  EVT VT = Op.getValueType();
2073  SDLoc dl(Op);
2074  unsigned Depth = cast<ConstantSDNode>(Op.getOperand(0))->getZExtValue();
2075  if (Depth) {
2076  SDValue FrameAddr = LowerFRAMEADDR(Op, DAG);
2077  SDValue Offset = DAG.getConstant(8, MVT::i64);
2078  return DAG.getLoad(VT, dl, DAG.getEntryNode(),
2079  DAG.getNode(ISD::ADD, dl, VT, FrameAddr, Offset),
2080  MachinePointerInfo(), false, false, false, 0);
2081  }
2082 
2083  // Return X30, which contains the return address. Mark it an implicit live-in.
2084  unsigned Reg = MF.addLiveIn(AArch64::X30, getRegClassFor(MVT::i64));
2085  return DAG.getCopyFromReg(DAG.getEntryNode(), dl, Reg, MVT::i64);
2086 }
2087 
2088 
2090  const {
2092  MFI->setFrameAddressIsTaken(true);
2093 
2094  EVT VT = Op.getValueType();
2095  SDLoc dl(Op);
2096  unsigned Depth = cast<ConstantSDNode>(Op.getOperand(0))->getZExtValue();
2097  unsigned FrameReg = AArch64::X29;
2098  SDValue FrameAddr = DAG.getCopyFromReg(DAG.getEntryNode(), dl, FrameReg, VT);
2099  while (Depth--)
2100  FrameAddr = DAG.getLoad(VT, dl, DAG.getEntryNode(), FrameAddr,
2102  false, false, false, 0);
2103  return FrameAddr;
2104 }
2105 
2106 SDValue
2108  SelectionDAG &DAG) const {
2109  assert(getTargetMachine().getCodeModel() == CodeModel::Large);
2110  assert(getTargetMachine().getRelocationModel() == Reloc::Static);
2111 
2112  EVT PtrVT = getPointerTy();
2113  SDLoc dl(Op);
2114  const GlobalAddressSDNode *GN = cast<GlobalAddressSDNode>(Op);
2115  const GlobalValue *GV = GN->getGlobal();
2116 
2117  SDValue GlobalAddr = DAG.getNode(
2118  AArch64ISD::WrapperLarge, dl, PtrVT,
2119  DAG.getTargetGlobalAddress(GV, dl, PtrVT, 0, AArch64II::MO_ABS_G3),
2120  DAG.getTargetGlobalAddress(GV, dl, PtrVT, 0, AArch64II::MO_ABS_G2_NC),
2121  DAG.getTargetGlobalAddress(GV, dl, PtrVT, 0, AArch64II::MO_ABS_G1_NC),
2122  DAG.getTargetGlobalAddress(GV, dl, PtrVT, 0, AArch64II::MO_ABS_G0_NC));
2123 
2124  if (GN->getOffset() != 0)
2125  return DAG.getNode(ISD::ADD, dl, PtrVT, GlobalAddr,
2126  DAG.getConstant(GN->getOffset(), PtrVT));
2127 
2128  return GlobalAddr;
2129 }
2130 
2131 SDValue
2133  SelectionDAG &DAG) const {
2134  assert(getTargetMachine().getCodeModel() == CodeModel::Small);
2135 
2136  EVT PtrVT = getPointerTy();
2137  SDLoc dl(Op);
2138  const GlobalAddressSDNode *GN = cast<GlobalAddressSDNode>(Op);
2139  const GlobalValue *GV = GN->getGlobal();
2140  unsigned Alignment = GV->getAlignment();
2142  if (GV->isWeakForLinker() && GV->isDeclaration() && RelocM == Reloc::Static) {
2143  // Weak undefined symbols can't use ADRP/ADD pair since they should evaluate
2144  // to zero when they remain undefined. In PIC mode the GOT can take care of
2145  // this, but in absolute mode we use a constant pool load.
2146  SDValue PoolAddr;
2147  PoolAddr = DAG.getNode(AArch64ISD::WrapperSmall, dl, PtrVT,
2148  DAG.getTargetConstantPool(GV, PtrVT, 0, 0,
2150  DAG.getTargetConstantPool(GV, PtrVT, 0, 0,
2152  DAG.getConstant(8, MVT::i32));
2153  SDValue GlobalAddr = DAG.getLoad(PtrVT, dl, DAG.getEntryNode(), PoolAddr,
2155  /*isVolatile=*/ false,
2156  /*isNonTemporal=*/ true,
2157  /*isInvariant=*/ true, 8);
2158  if (GN->getOffset() != 0)
2159  return DAG.getNode(ISD::ADD, dl, PtrVT, GlobalAddr,
2160  DAG.getConstant(GN->getOffset(), PtrVT));
2161 
2162  return GlobalAddr;
2163  }
2164 
2165  if (Alignment == 0) {
2166  const PointerType *GVPtrTy = cast<PointerType>(GV->getType());
2167  if (GVPtrTy->getElementType()->isSized()) {
2168  Alignment
2170  } else {
2171  // Be conservative if we can't guess, not that it really matters:
2172  // functions and labels aren't valid for loads, and the methods used to
2173  // actually calculate an address work with any alignment.
2174  Alignment = 1;
2175  }
2176  }
2177 
2178  unsigned char HiFixup, LoFixup;
2179  bool UseGOT = getSubtarget()->GVIsIndirectSymbol(GV, RelocM);
2180 
2181  if (UseGOT) {
2182  HiFixup = AArch64II::MO_GOT;
2183  LoFixup = AArch64II::MO_GOT_LO12;
2184  Alignment = 8;
2185  } else {
2186  HiFixup = AArch64II::MO_NO_FLAG;
2187  LoFixup = AArch64II::MO_LO12;
2188  }
2189 
2190  // AArch64's small model demands the following sequence:
2191  // ADRP x0, somewhere
2192  // ADD x0, x0, #:lo12:somewhere ; (or LDR directly).
2193  SDValue GlobalRef = DAG.getNode(AArch64ISD::WrapperSmall, dl, PtrVT,
2194  DAG.getTargetGlobalAddress(GV, dl, PtrVT, 0,
2195  HiFixup),
2196  DAG.getTargetGlobalAddress(GV, dl, PtrVT, 0,
2197  LoFixup),
2198  DAG.getConstant(Alignment, MVT::i32));
2199 
2200  if (UseGOT) {
2201  GlobalRef = DAG.getNode(AArch64ISD::GOTLoad, dl, PtrVT, DAG.getEntryNode(),
2202  GlobalRef);
2203  }
2204 
2205  if (GN->getOffset() != 0)
2206  return DAG.getNode(ISD::ADD, dl, PtrVT, GlobalRef,
2207  DAG.getConstant(GN->getOffset(), PtrVT));
2208 
2209  return GlobalRef;
2210 }
2211 
2212 SDValue
2214  SelectionDAG &DAG) const {
2215  // TableGen doesn't have easy access to the CodeModel or RelocationModel, so
2216  // we make those distinctions here.
2217 
2218  switch (getTargetMachine().getCodeModel()) {
2219  case CodeModel::Small:
2220  return LowerGlobalAddressELFSmall(Op, DAG);
2221  case CodeModel::Large:
2222  return LowerGlobalAddressELFLarge(Op, DAG);
2223  default:
2224  llvm_unreachable("Only small and large code models supported now");
2225  }
2226 }
2227 
2229  SDValue DescAddr,
2230  SDLoc DL,
2231  SelectionDAG &DAG) const {
2232  EVT PtrVT = getPointerTy();
2233 
2234  // The function we need to call is simply the first entry in the GOT for this
2235  // descriptor, load it in preparation.
2236  SDValue Func, Chain;
2237  Func = DAG.getNode(AArch64ISD::GOTLoad, DL, PtrVT, DAG.getEntryNode(),
2238  DescAddr);
2239 
2240  // The function takes only one argument: the address of the descriptor itself
2241  // in X0.
2242  SDValue Glue;
2243  Chain = DAG.getCopyToReg(DAG.getEntryNode(), DL, AArch64::X0, DescAddr, Glue);
2244  Glue = Chain.getValue(1);
2245 
2246  // Finally, there's a special calling-convention which means that the lookup
2247  // must preserve all registers (except X0, obviously).
2249  const AArch64RegisterInfo *A64RI
2250  = static_cast<const AArch64RegisterInfo *>(TRI);
2251  const uint32_t *Mask = A64RI->getTLSDescCallPreservedMask();
2252 
2253  // We're now ready to populate the argument list, as with a normal call:
2254  std::vector<SDValue> Ops;
2255  Ops.push_back(Chain);
2256  Ops.push_back(Func);
2257  Ops.push_back(SymAddr);
2258  Ops.push_back(DAG.getRegister(AArch64::X0, PtrVT));
2259  Ops.push_back(DAG.getRegisterMask(Mask));
2260  Ops.push_back(Glue);
2261 
2262  SDVTList NodeTys = DAG.getVTList(MVT::Other, MVT::Glue);
2263  Chain = DAG.getNode(AArch64ISD::TLSDESCCALL, DL, NodeTys, &Ops[0],
2264  Ops.size());
2265  Glue = Chain.getValue(1);
2266 
2267  // After the call, the offset from TPIDR_EL0 is in X0, copy it out and pass it
2268  // back to the generic handling code.
2269  return DAG.getCopyFromReg(Chain, DL, AArch64::X0, PtrVT, Glue);
2270 }
2271 
2272 SDValue
2274  SelectionDAG &DAG) const {
2275  assert(getSubtarget()->isTargetELF() &&
2276  "TLS not implemented for non-ELF targets");
2277  assert(getTargetMachine().getCodeModel() == CodeModel::Small
2278  && "TLS only supported in small memory model");
2279  const GlobalAddressSDNode *GA = cast<GlobalAddressSDNode>(Op);
2280 
2282 
2283  SDValue TPOff;
2284  EVT PtrVT = getPointerTy();
2285  SDLoc DL(Op);
2286  const GlobalValue *GV = GA->getGlobal();
2287 
2288  SDValue ThreadBase = DAG.getNode(AArch64ISD::THREAD_POINTER, DL, PtrVT);
2289 
2290  if (Model == TLSModel::InitialExec) {
2291  TPOff = DAG.getNode(AArch64ISD::WrapperSmall, DL, PtrVT,
2292  DAG.getTargetGlobalAddress(GV, DL, PtrVT, 0,
2294  DAG.getTargetGlobalAddress(GV, DL, PtrVT, 0,
2296  DAG.getConstant(8, MVT::i32));
2297  TPOff = DAG.getNode(AArch64ISD::GOTLoad, DL, PtrVT, DAG.getEntryNode(),
2298  TPOff);
2299  } else if (Model == TLSModel::LocalExec) {
2300  SDValue HiVar = DAG.getTargetGlobalAddress(GV, DL, MVT::i64, 0,
2302  SDValue LoVar = DAG.getTargetGlobalAddress(GV, DL, MVT::i64, 0,
2304 
2305  TPOff = SDValue(DAG.getMachineNode(AArch64::MOVZxii, DL, PtrVT, HiVar,
2306  DAG.getTargetConstant(1, MVT::i32)), 0);
2307  TPOff = SDValue(DAG.getMachineNode(AArch64::MOVKxii, DL, PtrVT,
2308  TPOff, LoVar,
2309  DAG.getTargetConstant(0, MVT::i32)), 0);
2310  } else if (Model == TLSModel::GeneralDynamic) {
2311  // Accesses used in this sequence go via the TLS descriptor which lives in
2312  // the GOT. Prepare an address we can use to handle this.
2313  SDValue HiDesc = DAG.getTargetGlobalAddress(GV, DL, PtrVT, 0,
2315  SDValue LoDesc = DAG.getTargetGlobalAddress(GV, DL, PtrVT, 0,
2317  SDValue DescAddr = DAG.getNode(AArch64ISD::WrapperSmall, DL, PtrVT,
2318  HiDesc, LoDesc,
2319  DAG.getConstant(8, MVT::i32));
2320  SDValue SymAddr = DAG.getTargetGlobalAddress(GV, DL, PtrVT, 0);
2321 
2322  TPOff = LowerTLSDescCall(SymAddr, DescAddr, DL, DAG);
2323  } else if (Model == TLSModel::LocalDynamic) {
2324  // Local-dynamic accesses proceed in two phases. A general-dynamic TLS
2325  // descriptor call against the special symbol _TLS_MODULE_BASE_ to calculate
2326  // the beginning of the module's TLS region, followed by a DTPREL offset
2327  // calculation.
2328 
2329  // These accesses will need deduplicating if there's more than one.
2333 
2334 
2335  // Get the location of _TLS_MODULE_BASE_:
2336  SDValue HiDesc = DAG.getTargetExternalSymbol("_TLS_MODULE_BASE_", PtrVT,
2338  SDValue LoDesc = DAG.getTargetExternalSymbol("_TLS_MODULE_BASE_", PtrVT,
2340  SDValue DescAddr = DAG.getNode(AArch64ISD::WrapperSmall, DL, PtrVT,
2341  HiDesc, LoDesc,
2342  DAG.getConstant(8, MVT::i32));
2343  SDValue SymAddr = DAG.getTargetExternalSymbol("_TLS_MODULE_BASE_", PtrVT);
2344 
2345  ThreadBase = LowerTLSDescCall(SymAddr, DescAddr, DL, DAG);
2346 
2347  // Get the variable's offset from _TLS_MODULE_BASE_
2348  SDValue HiVar = DAG.getTargetGlobalAddress(GV, DL, MVT::i64, 0,
2350  SDValue LoVar = DAG.getTargetGlobalAddress(GV, DL, MVT::i64, 0,
2352 
2353  TPOff = SDValue(DAG.getMachineNode(AArch64::MOVZxii, DL, PtrVT, HiVar,
2354  DAG.getTargetConstant(0, MVT::i32)), 0);
2355  TPOff = SDValue(DAG.getMachineNode(AArch64::MOVKxii, DL, PtrVT,
2356  TPOff, LoVar,
2357  DAG.getTargetConstant(0, MVT::i32)), 0);
2358  } else
2359  llvm_unreachable("Unsupported TLS access model");
2360 
2361 
2362  return DAG.getNode(ISD::ADD, DL, PtrVT, ThreadBase, TPOff);
2363 }
2364 
2365 SDValue
2367  bool IsSigned) const {
2368  if (Op.getValueType() != MVT::f128) {
2369  // Legal for everything except f128.
2370  return Op;
2371  }
2372 
2373  RTLIB::Libcall LC;
2374  if (IsSigned)
2376  else
2378 
2379  return LowerF128ToCall(Op, DAG, LC);
2380 }
2381 
2382 
2383 SDValue
2385  JumpTableSDNode *JT = cast<JumpTableSDNode>(Op);
2386  SDLoc dl(JT);
2387  EVT PtrVT = getPointerTy();
2388 
2389  // When compiling PIC, jump tables get put in the code section so a static
2390  // relocation-style is acceptable for both cases.
2391  switch (getTargetMachine().getCodeModel()) {
2392  case CodeModel::Small:
2393  return DAG.getNode(AArch64ISD::WrapperSmall, dl, PtrVT,
2394  DAG.getTargetJumpTable(JT->getIndex(), PtrVT),
2395  DAG.getTargetJumpTable(JT->getIndex(), PtrVT,
2397  DAG.getConstant(1, MVT::i32));
2398  case CodeModel::Large:
2399  return DAG.getNode(
2400  AArch64ISD::WrapperLarge, dl, PtrVT,
2405  default:
2406  llvm_unreachable("Only small and large code models supported now");
2407  }
2408 }
2409 
2410 // (SELECT_CC lhs, rhs, iftrue, iffalse, condcode)
2411 SDValue
2413  SDLoc dl(Op);
2414  SDValue LHS = Op.getOperand(0);
2415  SDValue RHS = Op.getOperand(1);
2416  SDValue IfTrue = Op.getOperand(2);
2417  SDValue IfFalse = Op.getOperand(3);
2418  ISD::CondCode CC = cast<CondCodeSDNode>(Op.getOperand(4))->get();
2419 
2420  if (LHS.getValueType() == MVT::f128) {
2421  // f128 comparisons are lowered to libcalls, but slot in nicely here
2422  // afterwards.
2423  softenSetCCOperands(DAG, MVT::f128, LHS, RHS, CC, dl);
2424 
2425  // If softenSetCCOperands returned a scalar, we need to compare the result
2426  // against zero to select between true and false values.
2427  if (RHS.getNode() == 0) {
2428  RHS = DAG.getConstant(0, LHS.getValueType());
2429  CC = ISD::SETNE;
2430  }
2431  }
2432 
2433  if (LHS.getValueType().isInteger()) {
2434  SDValue A64cc;
2435 
2436  // Integers are handled in a separate function because the combinations of
2437  // immediates and tests can get hairy and we may want to fiddle things.
2438  SDValue CmpOp = getSelectableIntSetCC(LHS, RHS, CC, A64cc, DAG, dl);
2439 
2440  return DAG.getNode(AArch64ISD::SELECT_CC, dl, Op.getValueType(),
2441  CmpOp, IfTrue, IfFalse, A64cc);
2442  }
2443 
2444  // Note that some LLVM floating-point CondCodes can't be lowered to a single
2445  // conditional branch, hence FPCCToA64CC can set a second test, where either
2446  // passing is sufficient.
2447  A64CC::CondCodes CondCode, Alternative = A64CC::Invalid;
2448  CondCode = FPCCToA64CC(CC, Alternative);
2449  SDValue A64cc = DAG.getConstant(CondCode, MVT::i32);
2450  SDValue SetCC = DAG.getNode(AArch64ISD::SETCC, dl, MVT::i32, LHS, RHS,
2451  DAG.getCondCode(CC));
2452  SDValue A64SELECT_CC = DAG.getNode(AArch64ISD::SELECT_CC, dl,
2453  Op.getValueType(),
2454  SetCC, IfTrue, IfFalse, A64cc);
2455 
2456  if (Alternative != A64CC::Invalid) {
2457  A64cc = DAG.getConstant(Alternative, MVT::i32);
2458  A64SELECT_CC = DAG.getNode(AArch64ISD::SELECT_CC, dl, Op.getValueType(),
2459  SetCC, IfTrue, A64SELECT_CC, A64cc);
2460 
2461  }
2462 
2463  return A64SELECT_CC;
2464 }
2465 
2466 // (SELECT testbit, iftrue, iffalse)
2467 SDValue
2469  SDLoc dl(Op);
2470  SDValue TheBit = Op.getOperand(0);
2471  SDValue IfTrue = Op.getOperand(1);
2472  SDValue IfFalse = Op.getOperand(2);
2473 
2474  // AArch64 BooleanContents is the default UndefinedBooleanContent, which means
2475  // that as the consumer we are responsible for ignoring rubbish in higher
2476  // bits.
2477  TheBit = DAG.getNode(ISD::AND, dl, MVT::i32, TheBit,
2478  DAG.getConstant(1, MVT::i32));
2479  SDValue A64CMP = DAG.getNode(AArch64ISD::SETCC, dl, MVT::i32, TheBit,
2480  DAG.getConstant(0, TheBit.getValueType()),
2481  DAG.getCondCode(ISD::SETNE));
2482 
2483  return DAG.getNode(AArch64ISD::SELECT_CC, dl, Op.getValueType(),
2484  A64CMP, IfTrue, IfFalse,
2486 }
2487 
2489  SDLoc DL(Op);
2490  SDValue LHS = Op.getOperand(0);
2491  SDValue RHS = Op.getOperand(1);
2492  ISD::CondCode CC = cast<CondCodeSDNode>(Op.getOperand(2))->get();
2493  EVT VT = Op.getValueType();
2494  bool Invert = false;
2495  SDValue Op0, Op1;
2496  unsigned Opcode;
2497 
2498  if (LHS.getValueType().isInteger()) {
2499 
2500  // Attempt to use Vector Integer Compare Mask Test instruction.
2501  // TST = icmp ne (and (op0, op1), zero).
2502  if (CC == ISD::SETNE) {
2503  if (((LHS.getOpcode() == ISD::AND) &&
2505  ((RHS.getOpcode() == ISD::AND) &&
2507 
2508  SDValue AndOp = (LHS.getOpcode() == ISD::AND) ? LHS : RHS;
2509  SDValue NewLHS = DAG.getNode(ISD::BITCAST, DL, VT, AndOp.getOperand(0));
2510  SDValue NewRHS = DAG.getNode(ISD::BITCAST, DL, VT, AndOp.getOperand(1));
2511  return DAG.getNode(AArch64ISD::NEON_TST, DL, VT, NewLHS, NewRHS);
2512  }
2513  }
2514 
2515  // Attempt to use Vector Integer Compare Mask against Zero instr (Signed).
2516  // Note: Compare against Zero does not support unsigned predicates.
2517  if ((ISD::isBuildVectorAllZeros(RHS.getNode()) ||
2519  !isUnsignedIntSetCC(CC)) {
2520 
2521  // If LHS is the zero value, swap operands and CondCode.
2522  if (ISD::isBuildVectorAllZeros(LHS.getNode())) {
2523  CC = getSetCCSwappedOperands(CC);
2524  Op0 = RHS;
2525  } else
2526  Op0 = LHS;
2527 
2528  // Ensure valid CondCode for Compare Mask against Zero instruction:
2529  // EQ, GE, GT, LE, LT.
2530  if (ISD::SETNE == CC) {
2531  Invert = true;
2532  CC = ISD::SETEQ;
2533  }
2534 
2535  // Using constant type to differentiate integer and FP compares with zero.
2536  Op1 = DAG.getConstant(0, MVT::i32);
2537  Opcode = AArch64ISD::NEON_CMPZ;
2538 
2539  } else {
2540  // Attempt to use Vector Integer Compare Mask instr (Signed/Unsigned).
2541  // Ensure valid CondCode for Compare Mask instr: EQ, GE, GT, UGE, UGT.
2542  bool Swap = false;
2543  switch (CC) {
2544  default:
2545  llvm_unreachable("Illegal integer comparison.");
2546  case ISD::SETEQ:
2547  case ISD::SETGT:
2548  case ISD::SETGE:
2549  case ISD::SETUGT:
2550  case ISD::SETUGE:
2551  break;
2552  case ISD::SETNE:
2553  Invert = true;
2554  CC = ISD::SETEQ;
2555  break;
2556  case ISD::SETULT:
2557  case ISD::SETULE:
2558  case ISD::SETLT:
2559  case ISD::SETLE:
2560  Swap = true;
2561  CC = getSetCCSwappedOperands(CC);
2562  }
2563 
2564  if (Swap)
2565  std::swap(LHS, RHS);
2566 
2567  Opcode = AArch64ISD::NEON_CMP;
2568  Op0 = LHS;
2569  Op1 = RHS;
2570  }
2571 
2572  // Generate Compare Mask instr or Compare Mask against Zero instr.
2573  SDValue NeonCmp =
2574  DAG.getNode(Opcode, DL, VT, Op0, Op1, DAG.getCondCode(CC));
2575 
2576  if (Invert)
2577  NeonCmp = DAG.getNOT(DL, NeonCmp, VT);
2578 
2579  return NeonCmp;
2580  }
2581 
2582  // Now handle Floating Point cases.
2583  // Attempt to use Vector Floating Point Compare Mask against Zero instruction.
2584  if (ISD::isBuildVectorAllZeros(RHS.getNode()) ||
2586 
2587  // If LHS is the zero value, swap operands and CondCode.
2588  if (ISD::isBuildVectorAllZeros(LHS.getNode())) {
2589  CC = getSetCCSwappedOperands(CC);
2590  Op0 = RHS;
2591  } else
2592  Op0 = LHS;
2593 
2594  // Using constant type to differentiate integer and FP compares with zero.
2595  Op1 = DAG.getConstantFP(0, MVT::f32);
2596  Opcode = AArch64ISD::NEON_CMPZ;
2597  } else {
2598  // Attempt to use Vector Floating Point Compare Mask instruction.
2599  Op0 = LHS;
2600  Op1 = RHS;
2601  Opcode = AArch64ISD::NEON_CMP;
2602  }
2603 
2604  SDValue NeonCmpAlt;
2605  // Some register compares have to be implemented with swapped CC and operands,
2606  // e.g.: OLT implemented as OGT with swapped operands.
2607  bool SwapIfRegArgs = false;
2608 
2609  // Ensure valid CondCode for FP Compare Mask against Zero instruction:
2610  // EQ, GE, GT, LE, LT.
2611  // And ensure valid CondCode for FP Compare Mask instruction: EQ, GE, GT.
2612  switch (CC) {
2613  default:
2614  llvm_unreachable("Illegal FP comparison");
2615  case ISD::SETUNE:
2616  case ISD::SETNE:
2617  Invert = true; // Fallthrough
2618  case ISD::SETOEQ:
2619  case ISD::SETEQ:
2620  CC = ISD::SETEQ;
2621  break;
2622  case ISD::SETOLT:
2623  case ISD::SETLT:
2624  CC = ISD::SETLT;
2625  SwapIfRegArgs = true;
2626  break;
2627  case ISD::SETOGT:
2628  case ISD::SETGT:
2629  CC = ISD::SETGT;
2630  break;
2631  case ISD::SETOLE:
2632  case ISD::SETLE:
2633  CC = ISD::SETLE;
2634  SwapIfRegArgs = true;
2635  break;
2636  case ISD::SETOGE:
2637  case ISD::SETGE:
2638  CC = ISD::SETGE;
2639  break;
2640  case ISD::SETUGE:
2641  Invert = true;
2642  CC = ISD::SETLT;
2643  SwapIfRegArgs = true;
2644  break;
2645  case ISD::SETULE:
2646  Invert = true;
2647  CC = ISD::SETGT;
2648  break;
2649  case ISD::SETUGT:
2650  Invert = true;
2651  CC = ISD::SETLE;
2652  SwapIfRegArgs = true;
2653  break;
2654  case ISD::SETULT:
2655  Invert = true;
2656  CC = ISD::SETGE;
2657  break;
2658  case ISD::SETUEQ:
2659  Invert = true; // Fallthrough
2660  case ISD::SETONE:
2661  // Expand this to (OGT |OLT).
2662  NeonCmpAlt =
2663  DAG.getNode(Opcode, DL, VT, Op0, Op1, DAG.getCondCode(ISD::SETGT));
2664  CC = ISD::SETLT;
2665  SwapIfRegArgs = true;
2666  break;
2667  case ISD::SETUO:
2668  Invert = true; // Fallthrough
2669  case ISD::SETO:
2670  // Expand this to (OGE | OLT).
2671  NeonCmpAlt =
2672  DAG.getNode(Opcode, DL, VT, Op0, Op1, DAG.getCondCode(ISD::SETGE));
2673  CC = ISD::SETLT;
2674  SwapIfRegArgs = true;
2675  break;
2676  }
2677 
2678  if (Opcode == AArch64ISD::NEON_CMP && SwapIfRegArgs) {
2679  CC = getSetCCSwappedOperands(CC);
2680  std::swap(Op0, Op1);
2681  }
2682 
2683  // Generate FP Compare Mask instr or FP Compare Mask against Zero instr
2684  SDValue NeonCmp = DAG.getNode(Opcode, DL, VT, Op0, Op1, DAG.getCondCode(CC));
2685 
2686  if (NeonCmpAlt.getNode())
2687  NeonCmp = DAG.getNode(ISD::OR, DL, VT, NeonCmp, NeonCmpAlt);
2688 
2689  if (Invert)
2690  NeonCmp = DAG.getNOT(DL, NeonCmp, VT);
2691 
2692  return NeonCmp;
2693 }
2694 
2695 // (SETCC lhs, rhs, condcode)
2696 SDValue
2698  SDLoc dl(Op);
2699  SDValue LHS = Op.getOperand(0);
2700  SDValue RHS = Op.getOperand(1);
2701  ISD::CondCode CC = cast<CondCodeSDNode>(Op.getOperand(2))->get();
2702  EVT VT = Op.getValueType();
2703 
2704  if (VT.isVector())
2705  return LowerVectorSETCC(Op, DAG);
2706 
2707  if (LHS.getValueType() == MVT::f128) {
2708  // f128 comparisons will be lowered to libcalls giving a valid LHS and RHS
2709  // for the rest of the function (some i32 or i64 values).
2710  softenSetCCOperands(DAG, MVT::f128, LHS, RHS, CC, dl);
2711 
2712  // If softenSetCCOperands returned a scalar, use it.
2713  if (RHS.getNode() == 0) {
2714  assert(LHS.getValueType() == Op.getValueType() &&
2715  "Unexpected setcc expansion!");
2716  return LHS;
2717  }
2718  }
2719 
2720  if (LHS.getValueType().isInteger()) {
2721  SDValue A64cc;
2722 
2723  // Integers are handled in a separate function because the combinations of
2724  // immediates and tests can get hairy and we may want to fiddle things.
2725  SDValue CmpOp = getSelectableIntSetCC(LHS, RHS, CC, A64cc, DAG, dl);
2726 
2727  return DAG.getNode(AArch64ISD::SELECT_CC, dl, VT,
2728  CmpOp, DAG.getConstant(1, VT), DAG.getConstant(0, VT),
2729  A64cc);
2730  }
2731 
2732  // Note that some LLVM floating-point CondCodes can't be lowered to a single
2733  // conditional branch, hence FPCCToA64CC can set a second test, where either
2734  // passing is sufficient.
2735  A64CC::CondCodes CondCode, Alternative = A64CC::Invalid;
2736  CondCode = FPCCToA64CC(CC, Alternative);
2737  SDValue A64cc = DAG.getConstant(CondCode, MVT::i32);
2738  SDValue CmpOp = DAG.getNode(AArch64ISD::SETCC, dl, MVT::i32, LHS, RHS,
2739  DAG.getCondCode(CC));
2740  SDValue A64SELECT_CC = DAG.getNode(AArch64ISD::SELECT_CC, dl, VT,
2741  CmpOp, DAG.getConstant(1, VT),
2742  DAG.getConstant(0, VT), A64cc);
2743 
2744  if (Alternative != A64CC::Invalid) {
2745  A64cc = DAG.getConstant(Alternative, MVT::i32);
2746  A64SELECT_CC = DAG.getNode(AArch64ISD::SELECT_CC, dl, VT, CmpOp,
2747  DAG.getConstant(1, VT), A64SELECT_CC, A64cc);
2748  }
2749 
2750  return A64SELECT_CC;
2751 }
2752 
2753 SDValue
2755  const Value *DestSV = cast<SrcValueSDNode>(Op.getOperand(3))->getValue();
2756  const Value *SrcSV = cast<SrcValueSDNode>(Op.getOperand(3))->getValue();
2757 
2758  // We have to make sure we copy the entire structure: 8+8+8+4+4 = 32 bytes
2759  // rather than just 8.
2760  return DAG.getMemcpy(Op.getOperand(0), SDLoc(Op),
2761  Op.getOperand(1), Op.getOperand(2),
2762  DAG.getConstant(32, MVT::i32), 8, false, false,
2763  MachinePointerInfo(DestSV), MachinePointerInfo(SrcSV));
2764 }
2765 
2766 SDValue
2768  // The layout of the va_list struct is specified in the AArch64 Procedure Call
2769  // Standard, section B.3.
2770  MachineFunction &MF = DAG.getMachineFunction();
2771  AArch64MachineFunctionInfo *FuncInfo
2773  SDLoc DL(Op);
2774 
2775  SDValue Chain = Op.getOperand(0);
2776  SDValue VAList = Op.getOperand(1);
2777  const Value *SV = cast<SrcValueSDNode>(Op.getOperand(2))->getValue();
2778  SmallVector<SDValue, 4> MemOps;
2779 
2780  // void *__stack at offset 0
2781  SDValue Stack = DAG.getFrameIndex(FuncInfo->getVariadicStackIdx(),
2782  getPointerTy());
2783  MemOps.push_back(DAG.getStore(Chain, DL, Stack, VAList,
2784  MachinePointerInfo(SV), false, false, 0));
2785 
2786  // void *__gr_top at offset 8
2787  int GPRSize = FuncInfo->getVariadicGPRSize();
2788  if (GPRSize > 0) {
2789  SDValue GRTop, GRTopAddr;
2790 
2791  GRTopAddr = DAG.getNode(ISD::ADD, DL, getPointerTy(), VAList,
2792  DAG.getConstant(8, getPointerTy()));
2793 
2794  GRTop = DAG.getFrameIndex(FuncInfo->getVariadicGPRIdx(), getPointerTy());
2795  GRTop = DAG.getNode(ISD::ADD, DL, getPointerTy(), GRTop,
2796  DAG.getConstant(GPRSize, getPointerTy()));
2797 
2798  MemOps.push_back(DAG.getStore(Chain, DL, GRTop, GRTopAddr,
2799  MachinePointerInfo(SV, 8),
2800  false, false, 0));
2801  }
2802 
2803  // void *__vr_top at offset 16
2804  int FPRSize = FuncInfo->getVariadicFPRSize();
2805  if (FPRSize > 0) {
2806  SDValue VRTop, VRTopAddr;
2807  VRTopAddr = DAG.getNode(ISD::ADD, DL, getPointerTy(), VAList,
2808  DAG.getConstant(16, getPointerTy()));
2809 
2810  VRTop = DAG.getFrameIndex(FuncInfo->getVariadicFPRIdx(), getPointerTy());
2811  VRTop = DAG.getNode(ISD::ADD, DL, getPointerTy(), VRTop,
2812  DAG.getConstant(FPRSize, getPointerTy()));
2813 
2814  MemOps.push_back(DAG.getStore(Chain, DL, VRTop, VRTopAddr,
2815  MachinePointerInfo(SV, 16),
2816  false, false, 0));
2817  }
2818 
2819  // int __gr_offs at offset 24
2820  SDValue GROffsAddr = DAG.getNode(ISD::ADD, DL, getPointerTy(), VAList,
2821  DAG.getConstant(24, getPointerTy()));
2822  MemOps.push_back(DAG.getStore(Chain, DL, DAG.getConstant(-GPRSize, MVT::i32),
2823  GROffsAddr, MachinePointerInfo(SV, 24),
2824  false, false, 0));
2825 
2826  // int __vr_offs at offset 28
2827  SDValue VROffsAddr = DAG.getNode(ISD::ADD, DL, getPointerTy(), VAList,
2828  DAG.getConstant(28, getPointerTy()));
2829  MemOps.push_back(DAG.getStore(Chain, DL, DAG.getConstant(-FPRSize, MVT::i32),
2830  VROffsAddr, MachinePointerInfo(SV, 28),
2831  false, false, 0));
2832 
2833  return DAG.getNode(ISD::TokenFactor, DL, MVT::Other, &MemOps[0],
2834  MemOps.size());
2835 }
2836 
2837 SDValue
2839  switch (Op.getOpcode()) {
2840  default: llvm_unreachable("Don't know how to custom lower this!");
2841  case ISD::FADD: return LowerF128ToCall(Op, DAG, RTLIB::ADD_F128);
2842  case ISD::FSUB: return LowerF128ToCall(Op, DAG, RTLIB::SUB_F128);
2843  case ISD::FMUL: return LowerF128ToCall(Op, DAG, RTLIB::MUL_F128);
2844  case ISD::FDIV: return LowerF128ToCall(Op, DAG, RTLIB::DIV_F128);
2845  case ISD::FP_TO_SINT: return LowerFP_TO_INT(Op, DAG, true);
2846  case ISD::FP_TO_UINT: return LowerFP_TO_INT(Op, DAG, false);
2847  case ISD::SINT_TO_FP: return LowerINT_TO_FP(Op, DAG, true);
2848  case ISD::UINT_TO_FP: return LowerINT_TO_FP(Op, DAG, false);
2849  case ISD::FP_ROUND: return LowerFP_ROUND(Op, DAG);
2850  case ISD::FP_EXTEND: return LowerFP_EXTEND(Op, DAG);
2851  case ISD::RETURNADDR: return LowerRETURNADDR(Op, DAG);
2852  case ISD::FRAMEADDR: return LowerFRAMEADDR(Op, DAG);
2853 
2854  case ISD::BlockAddress: return LowerBlockAddress(Op, DAG);
2855  case ISD::BRCOND: return LowerBRCOND(Op, DAG);
2856  case ISD::BR_CC: return LowerBR_CC(Op, DAG);
2857  case ISD::GlobalAddress: return LowerGlobalAddressELF(Op, DAG);
2858  case ISD::GlobalTLSAddress: return LowerGlobalTLSAddress(Op, DAG);
2859  case ISD::JumpTable: return LowerJumpTable(Op, DAG);
2860  case ISD::SELECT: return LowerSELECT(Op, DAG);
2861  case ISD::SELECT_CC: return LowerSELECT_CC(Op, DAG);
2862  case ISD::SETCC: return LowerSETCC(Op, DAG);
2863  case ISD::VACOPY: return LowerVACOPY(Op, DAG);
2864  case ISD::VASTART: return LowerVASTART(Op, DAG);
2865  case ISD::BUILD_VECTOR:
2866  return LowerBUILD_VECTOR(Op, DAG, getSubtarget());
2867  case ISD::VECTOR_SHUFFLE: return LowerVECTOR_SHUFFLE(Op, DAG);
2868  }
2869 
2870  return SDValue();
2871 }
2872 
2873 /// Check if the specified splat value corresponds to a valid vector constant
2874 /// for a Neon instruction with a "modified immediate" operand (e.g., MOVI). If
2875 /// so, return the encoded 8-bit immediate and the OpCmode instruction fields
2876 /// values.
2877 static bool isNeonModifiedImm(uint64_t SplatBits, uint64_t SplatUndef,
2878  unsigned SplatBitSize, SelectionDAG &DAG,
2879  bool is128Bits, NeonModImmType type, EVT &VT,
2880  unsigned &Imm, unsigned &OpCmode) {
2881  switch (SplatBitSize) {
2882  default:
2883  llvm_unreachable("unexpected size for isNeonModifiedImm");
2884  case 8: {
2885  if (type != Neon_Mov_Imm)
2886  return false;
2887  assert((SplatBits & ~0xff) == 0 && "one byte splat value is too big");
2888  // Neon movi per byte: Op=0, Cmode=1110.
2889  OpCmode = 0xe;
2890  Imm = SplatBits;
2891  VT = is128Bits ? MVT::v16i8 : MVT::v8i8;
2892  break;
2893  }
2894  case 16: {
2895  // Neon move inst per halfword
2896  VT = is128Bits ? MVT::v8i16 : MVT::v4i16;
2897  if ((SplatBits & ~0xff) == 0) {
2898  // Value = 0x00nn is 0x00nn LSL 0
2899  // movi: Op=0, Cmode=1000; mvni: Op=1, Cmode=1000
2900  // bic: Op=1, Cmode=1001; orr: Op=0, Cmode=1001
2901  // Op=x, Cmode=100y
2902  Imm = SplatBits;
2903  OpCmode = 0x8;
2904  break;
2905  }
2906  if ((SplatBits & ~0xff00) == 0) {
2907  // Value = 0xnn00 is 0x00nn LSL 8
2908  // movi: Op=0, Cmode=1010; mvni: Op=1, Cmode=1010
2909  // bic: Op=1, Cmode=1011; orr: Op=0, Cmode=1011
2910  // Op=x, Cmode=101x
2911  Imm = SplatBits >> 8;
2912  OpCmode = 0xa;
2913  break;
2914  }
2915  // can't handle any other
2916  return false;
2917  }
2918 
2919  case 32: {
2920  // First the LSL variants (MSL is unusable by some interested instructions).
2921 
2922  // Neon move instr per word, shift zeros
2923  VT = is128Bits ? MVT::v4i32 : MVT::v2i32;
2924  if ((SplatBits & ~0xff) == 0) {
2925  // Value = 0x000000nn is 0x000000nn LSL 0
2926  // movi: Op=0, Cmode= 0000; mvni: Op=1, Cmode= 0000
2927  // bic: Op=1, Cmode= 0001; orr: Op=0, Cmode= 0001
2928  // Op=x, Cmode=000x
2929  Imm = SplatBits;
2930  OpCmode = 0;
2931  break;
2932  }
2933  if ((SplatBits & ~0xff00) == 0) {
2934  // Value = 0x0000nn00 is 0x000000nn LSL 8
2935  // movi: Op=0, Cmode= 0010; mvni: Op=1, Cmode= 0010
2936  // bic: Op=1, Cmode= 0011; orr : Op=0, Cmode= 0011
2937  // Op=x, Cmode=001x
2938  Imm = SplatBits >> 8;
2939  OpCmode = 0x2;
2940  break;
2941  }
2942  if ((SplatBits & ~0xff0000) == 0) {
2943  // Value = 0x00nn0000 is 0x000000nn LSL 16
2944  // movi: Op=0, Cmode= 0100; mvni: Op=1, Cmode= 0100
2945  // bic: Op=1, Cmode= 0101; orr: Op=0, Cmode= 0101
2946  // Op=x, Cmode=010x
2947  Imm = SplatBits >> 16;
2948  OpCmode = 0x4;
2949  break;
2950  }
2951  if ((SplatBits & ~0xff000000) == 0) {
2952  // Value = 0xnn000000 is 0x000000nn LSL 24
2953  // movi: Op=0, Cmode= 0110; mvni: Op=1, Cmode= 0110
2954  // bic: Op=1, Cmode= 0111; orr: Op=0, Cmode= 0111
2955  // Op=x, Cmode=011x
2956  Imm = SplatBits >> 24;
2957  OpCmode = 0x6;
2958  break;
2959  }
2960 
2961  // Now the MSL immediates.
2962 
2963  // Neon move instr per word, shift ones
2964  if ((SplatBits & ~0xffff) == 0 &&
2965  ((SplatBits | SplatUndef) & 0xff) == 0xff) {
2966  // Value = 0x0000nnff is 0x000000nn MSL 8
2967  // movi: Op=0, Cmode= 1100; mvni: Op=1, Cmode= 1100
2968  // Op=x, Cmode=1100
2969  Imm = SplatBits >> 8;
2970  OpCmode = 0xc;
2971  break;
2972  }
2973  if ((SplatBits & ~0xffffff) == 0 &&
2974  ((SplatBits | SplatUndef) & 0xffff) == 0xffff) {
2975  // Value = 0x00nnffff is 0x000000nn MSL 16
2976  // movi: Op=1, Cmode= 1101; mvni: Op=1, Cmode= 1101
2977  // Op=x, Cmode=1101
2978  Imm = SplatBits >> 16;
2979  OpCmode = 0xd;
2980  break;
2981  }
2982  // can't handle any other
2983  return false;
2984  }
2985 
2986  case 64: {
2987  if (type != Neon_Mov_Imm)
2988  return false;
2989  // Neon move instr bytemask, where each byte is either 0x00 or 0xff.
2990  // movi Op=1, Cmode=1110.
2991  OpCmode = 0x1e;
2992  uint64_t BitMask = 0xff;
2993  uint64_t Val = 0;
2994  unsigned ImmMask = 1;
2995  Imm = 0;
2996  for (int ByteNum = 0; ByteNum < 8; ++ByteNum) {
2997  if (((SplatBits | SplatUndef) & BitMask) == BitMask) {
2998  Val |= BitMask;
2999  Imm |= ImmMask;
3000  } else if ((SplatBits & BitMask) != 0) {
3001  return false;
3002  }
3003  BitMask <<= 8;
3004  ImmMask <<= 1;
3005  }
3006  SplatBits = Val;
3007  VT = is128Bits ? MVT::v2i64 : MVT::v1i64;
3008  break;
3009  }
3010  }
3011 
3012  return true;
3013 }
3014 
3017 
3018  SelectionDAG &DAG = DCI.DAG;
3019  SDLoc DL(N);
3020  EVT VT = N->getValueType(0);
3021 
3022  // We're looking for an SRA/SHL pair which form an SBFX.
3023 
3024  if (VT != MVT::i32 && VT != MVT::i64)
3025  return SDValue();
3026 
3027  if (!isa<ConstantSDNode>(N->getOperand(1)))
3028  return SDValue();
3029 
3030  uint64_t TruncMask = N->getConstantOperandVal(1);
3031  if (!isMask_64(TruncMask))
3032  return SDValue();
3033 
3034  uint64_t Width = CountPopulation_64(TruncMask);
3035  SDValue Shift = N->getOperand(0);
3036 
3037  if (Shift.getOpcode() != ISD::SRL)
3038  return SDValue();
3039 
3040  if (!isa<ConstantSDNode>(Shift->getOperand(1)))
3041  return SDValue();
3042  uint64_t LSB = Shift->getConstantOperandVal(1);
3043 
3044  if (LSB > VT.getSizeInBits() || Width > VT.getSizeInBits())
3045  return SDValue();
3046 
3047  return DAG.getNode(AArch64ISD::UBFX, DL, VT, Shift.getOperand(0),
3048  DAG.getConstant(LSB, MVT::i64),
3049  DAG.getConstant(LSB + Width - 1, MVT::i64));
3050 }
3051 
3052 /// For a true bitfield insert, the bits getting into that contiguous mask
3053 /// should come from the low part of an existing value: they must be formed from
3054 /// a compatible SHL operation (unless they're already low). This function
3055 /// checks that condition and returns the least-significant bit that's
3056 /// intended. If the operation not a field preparation, -1 is returned.
3057 static int32_t getLSBForBFI(SelectionDAG &DAG, SDLoc DL, EVT VT,
3058  SDValue &MaskedVal, uint64_t Mask) {
3059  if (!isShiftedMask_64(Mask))
3060  return -1;
3061 
3062  // Now we need to alter MaskedVal so that it is an appropriate input for a BFI
3063  // instruction. BFI will do a left-shift by LSB before applying the mask we've
3064  // spotted, so in general we should pre-emptively "undo" that by making sure
3065  // the incoming bits have had a right-shift applied to them.
3066  //
3067  // This right shift, however, will combine with existing left/right shifts. In
3068  // the simplest case of a completely straight bitfield operation, it will be
3069  // expected to completely cancel out with an existing SHL. More complicated
3070  // cases (e.g. bitfield to bitfield copy) may still need a real shift before
3071  // the BFI.
3072 
3073  uint64_t LSB = countTrailingZeros(Mask);
3074  int64_t ShiftRightRequired = LSB;
3075  if (MaskedVal.getOpcode() == ISD::SHL &&
3076  isa<ConstantSDNode>(MaskedVal.getOperand(1))) {
3077  ShiftRightRequired -= MaskedVal.getConstantOperandVal(1);
3078  MaskedVal = MaskedVal.getOperand(0);
3079  } else if (MaskedVal.getOpcode() == ISD::SRL &&
3080  isa<ConstantSDNode>(MaskedVal.getOperand(1))) {
3081  ShiftRightRequired += MaskedVal.getConstantOperandVal(1);
3082  MaskedVal = MaskedVal.getOperand(0);
3083  }
3084 
3085  if (ShiftRightRequired > 0)
3086  MaskedVal = DAG.getNode(ISD::SRL, DL, VT, MaskedVal,
3087  DAG.getConstant(ShiftRightRequired, MVT::i64));
3088  else if (ShiftRightRequired < 0) {
3089  // We could actually end up with a residual left shift, for example with
3090  // "struc.bitfield = val << 1".
3091  MaskedVal = DAG.getNode(ISD::SHL, DL, VT, MaskedVal,
3092  DAG.getConstant(-ShiftRightRequired, MVT::i64));
3093  }
3094 
3095  return LSB;
3096 }
3097 
3098 /// Searches from N for an existing AArch64ISD::BFI node, possibly surrounded by
3099 /// a mask and an extension. Returns true if a BFI was found and provides
3100 /// information on its surroundings.
3101 static bool findMaskedBFI(SDValue N, SDValue &BFI, uint64_t &Mask,
3102  bool &Extended) {
3103  Extended = false;
3104  if (N.getOpcode() == ISD::ZERO_EXTEND) {
3105  Extended = true;
3106  N = N.getOperand(0);
3107  }
3108 
3109  if (N.getOpcode() == ISD::AND && isa<ConstantSDNode>(N.getOperand(1))) {
3110  Mask = N->getConstantOperandVal(1);
3111  N = N.getOperand(0);
3112  } else {
3113  // Mask is the whole width.
3114  Mask = -1ULL >> (64 - N.getValueType().getSizeInBits());
3115  }
3116 
3117  if (N.getOpcode() == AArch64ISD::BFI) {
3118  BFI = N;
3119  return true;
3120  }
3121 
3122  return false;
3123 }
3124 
3125 /// Try to combine a subtree (rooted at an OR) into a "masked BFI" node, which
3126 /// is roughly equivalent to (and (BFI ...), mask). This form is used because it
3127 /// can often be further combined with a larger mask. Ultimately, we want mask
3128 /// to be 2^32-1 or 2^64-1 so the AND can be skipped.
3131  const AArch64Subtarget *Subtarget) {
3132  SelectionDAG &DAG = DCI.DAG;
3133  SDLoc DL(N);
3134  EVT VT = N->getValueType(0);
3135 
3136  assert(N->getOpcode() == ISD::OR && "Unexpected root");
3137 
3138  // We need the LHS to be (and SOMETHING, MASK). Find out what that mask is or
3139  // abandon the effort.
3140  SDValue LHS = N->getOperand(0);
3141  if (LHS.getOpcode() != ISD::AND)
3142  return SDValue();
3143 
3144  uint64_t LHSMask;
3145  if (isa<ConstantSDNode>(LHS.getOperand(1)))
3146  LHSMask = LHS->getConstantOperandVal(1);
3147  else
3148  return SDValue();
3149 
3150  // We also need the RHS to be (and SOMETHING, MASK). Find out what that mask
3151  // is or abandon the effort.
3152  SDValue RHS = N->getOperand(1);
3153  if (RHS.getOpcode() != ISD::AND)
3154  return SDValue();
3155 
3156  uint64_t RHSMask;
3157  if (isa<ConstantSDNode>(RHS.getOperand(1)))
3158  RHSMask = RHS->getConstantOperandVal(1);
3159  else
3160  return SDValue();
3161 
3162  // Can't do anything if the masks are incompatible.
3163  if (LHSMask & RHSMask)
3164  return SDValue();
3165 
3166  // Now we need one of the masks to be a contiguous field. Without loss of
3167  // generality that should be the RHS one.
3168  SDValue Bitfield = LHS.getOperand(0);
3169  if (getLSBForBFI(DAG, DL, VT, Bitfield, LHSMask) != -1) {
3170  // We know that LHS is a candidate new value, and RHS isn't already a better
3171  // one.
3172  std::swap(LHS, RHS);
3173  std::swap(LHSMask, RHSMask);
3174  }
3175 
3176  // We've done our best to put the right operands in the right places, all we
3177  // can do now is check whether a BFI exists.
3178  Bitfield = RHS.getOperand(0);
3179  int32_t LSB = getLSBForBFI(DAG, DL, VT, Bitfield, RHSMask);
3180  if (LSB == -1)
3181  return SDValue();
3182 
3183  uint32_t Width = CountPopulation_64(RHSMask);
3184  assert(Width && "Expected non-zero bitfield width");
3185 
3186  SDValue BFI = DAG.getNode(AArch64ISD::BFI, DL, VT,
3187  LHS.getOperand(0), Bitfield,
3188  DAG.getConstant(LSB, MVT::i64),
3189  DAG.getConstant(Width, MVT::i64));
3190 
3191  // Mask is trivial
3192  if ((LHSMask | RHSMask) == (-1ULL >> (64 - VT.getSizeInBits())))
3193  return BFI;
3194 
3195  return DAG.getNode(ISD::AND, DL, VT, BFI,
3196  DAG.getConstant(LHSMask | RHSMask, VT));
3197 }
3198 
3199 /// Search for the bitwise combining (with careful masks) of a MaskedBFI and its
3200 /// original input. This is surprisingly common because SROA splits things up
3201 /// into i8 chunks, so the originally detected MaskedBFI may actually only act
3202 /// on the low (say) byte of a word. This is then orred into the rest of the
3203 /// word afterwards.
3204 ///
3205 /// Basic input: (or (and OLDFIELD, MASK1), (MaskedBFI MASK2, OLDFIELD, ...)).
3206 ///
3207 /// If MASK1 and MASK2 are compatible, we can fold the whole thing into the
3208 /// MaskedBFI. We can also deal with a certain amount of extend/truncate being
3209 /// involved.
3212  const AArch64Subtarget *Subtarget) {
3213  SelectionDAG &DAG = DCI.DAG;
3214  SDLoc DL(N);
3215  EVT VT = N->getValueType(0);
3216 
3217  // First job is to hunt for a MaskedBFI on either the left or right. Swap
3218  // operands if it's actually on the right.
3219  SDValue BFI;
3220  SDValue PossExtraMask;
3221  uint64_t ExistingMask = 0;
3222  bool Extended = false;
3223  if (findMaskedBFI(N->getOperand(0), BFI, ExistingMask, Extended))
3224  PossExtraMask = N->getOperand(1);
3225  else if (findMaskedBFI(N->getOperand(1), BFI, ExistingMask, Extended))
3226  PossExtraMask = N->getOperand(0);
3227  else
3228  return SDValue();
3229 
3230  // We can only combine a BFI with another compatible mask.
3231  if (PossExtraMask.getOpcode() != ISD::AND ||
3232  !isa<ConstantSDNode>(PossExtraMask.getOperand(1)))
3233  return SDValue();
3234 
3235  uint64_t ExtraMask = PossExtraMask->getConstantOperandVal(1);
3236 
3237  // Masks must be compatible.
3238  if (ExtraMask & ExistingMask)
3239  return SDValue();
3240 
3241  SDValue OldBFIVal = BFI.getOperand(0);
3242  SDValue NewBFIVal = BFI.getOperand(1);
3243  if (Extended) {
3244  // We skipped a ZERO_EXTEND above, so the input to the MaskedBFIs should be
3245  // 32-bit and we'll be forming a 64-bit MaskedBFI. The MaskedBFI arguments
3246  // need to be made compatible.
3247  assert(VT == MVT::i64 && BFI.getValueType() == MVT::i32
3248  && "Invalid types for BFI");
3249  OldBFIVal = DAG.getNode(ISD::ANY_EXTEND, DL, VT, OldBFIVal);
3250  NewBFIVal = DAG.getNode(ISD::ANY_EXTEND, DL, VT, NewBFIVal);
3251  }
3252 
3253  // We need the MaskedBFI to be combined with a mask of the *same* value.
3254  if (PossExtraMask.getOperand(0) != OldBFIVal)
3255  return SDValue();
3256 
3257  BFI = DAG.getNode(AArch64ISD::BFI, DL, VT,
3258  OldBFIVal, NewBFIVal,
3259  BFI.getOperand(2), BFI.getOperand(3));
3260 
3261  // If the masking is trivial, we don't need to create it.
3262  if ((ExtraMask | ExistingMask) == (-1ULL >> (64 - VT.getSizeInBits())))
3263  return BFI;
3264 
3265  return DAG.getNode(ISD::AND, DL, VT, BFI,
3266  DAG.getConstant(ExtraMask | ExistingMask, VT));
3267 }
3268 
3269 /// An EXTR instruction is made up of two shifts, ORed together. This helper
3270 /// searches for and classifies those shifts.
3271 static bool findEXTRHalf(SDValue N, SDValue &Src, uint32_t &ShiftAmount,
3272  bool &FromHi) {
3273  if (N.getOpcode() == ISD::SHL)
3274  FromHi = false;
3275  else if (N.getOpcode() == ISD::SRL)
3276  FromHi = true;
3277  else
3278  return false;
3279 
3280  if (!isa<ConstantSDNode>(N.getOperand(1)))
3281  return false;
3282 
3283  ShiftAmount = N->getConstantOperandVal(1);
3284  Src = N->getOperand(0);
3285  return true;
3286 }
3287 
3288 /// EXTR instruction extracts a contiguous chunk of bits from two existing
3289 /// registers viewed as a high/low pair. This function looks for the pattern:
3290 /// (or (shl VAL1, #N), (srl VAL2, #RegWidth-N)) and replaces it with an
3291 /// EXTR. Can't quite be done in TableGen because the two immediates aren't
3292 /// independent.
3295  SelectionDAG &DAG = DCI.DAG;
3296  SDLoc DL(N);
3297  EVT VT = N->getValueType(0);
3298 
3299  assert(N->getOpcode() == ISD::OR && "Unexpected root");
3300 
3301  if (VT != MVT::i32 && VT != MVT::i64)
3302  return SDValue();
3303 
3304  SDValue LHS;
3305  uint32_t ShiftLHS = 0;
3306  bool LHSFromHi = 0;
3307  if (!findEXTRHalf(N->getOperand(0), LHS, ShiftLHS, LHSFromHi))
3308  return SDValue();
3309 
3310  SDValue RHS;
3311  uint32_t ShiftRHS = 0;
3312  bool RHSFromHi = 0;
3313  if (!findEXTRHalf(N->getOperand(1), RHS, ShiftRHS, RHSFromHi))
3314  return SDValue();
3315 
3316  // If they're both trying to come from the high part of the register, they're
3317  // not really an EXTR.
3318  if (LHSFromHi == RHSFromHi)
3319  return SDValue();
3320 
3321  if (ShiftLHS + ShiftRHS != VT.getSizeInBits())
3322  return SDValue();
3323 
3324  if (LHSFromHi) {
3325  std::swap(LHS, RHS);
3326  std::swap(ShiftLHS, ShiftRHS);
3327  }
3328 
3329  return DAG.getNode(AArch64ISD::EXTR, DL, VT,
3330  LHS, RHS,
3331  DAG.getConstant(ShiftRHS, MVT::i64));
3332 }
3333 
3334 /// Target-specific dag combine xforms for ISD::OR
3337  const AArch64Subtarget *Subtarget) {
3338 
3339  SelectionDAG &DAG = DCI.DAG;
3340  SDLoc DL(N);
3341  EVT VT = N->getValueType(0);
3342 
3343  if(!DAG.getTargetLoweringInfo().isTypeLegal(VT))
3344  return SDValue();
3345 
3346  // Attempt to recognise bitfield-insert operations.
3347  SDValue Res = tryCombineToBFI(N, DCI, Subtarget);
3348  if (Res.getNode())
3349  return Res;
3350 
3351  // Attempt to combine an existing MaskedBFI operation into one with a larger
3352  // mask.
3353  Res = tryCombineToLargerBFI(N, DCI, Subtarget);
3354  if (Res.getNode())
3355  return Res;
3356 
3357  Res = tryCombineToEXTR(N, DCI);
3358  if (Res.getNode())
3359  return Res;
3360 
3361  if (!Subtarget->hasNEON())
3362  return SDValue();
3363 
3364  // Attempt to use vector immediate-form BSL
3365  // (or (and B, A), (and C, ~A)) => (VBSL A, B, C) when A is a constant.
3366 
3367  SDValue N0 = N->getOperand(0);
3368  if (N0.getOpcode() != ISD::AND)
3369  return SDValue();
3370 
3371  SDValue N1 = N->getOperand(1);
3372  if (N1.getOpcode() != ISD::AND)
3373  return SDValue();
3374 
3375  if (VT.isVector() && DAG.getTargetLoweringInfo().isTypeLegal(VT)) {
3376  APInt SplatUndef;
3377  unsigned SplatBitSize;
3378  bool HasAnyUndefs;
3380  APInt SplatBits0;
3381  if (BVN0 && BVN0->isConstantSplat(SplatBits0, SplatUndef, SplatBitSize,
3382  HasAnyUndefs) &&
3383  !HasAnyUndefs) {
3385  APInt SplatBits1;
3386  if (BVN1 && BVN1->isConstantSplat(SplatBits1, SplatUndef, SplatBitSize,
3387  HasAnyUndefs) &&
3388  !HasAnyUndefs && SplatBits0 == ~SplatBits1) {
3389  // Canonicalize the vector type to make instruction selection simpler.
3390  EVT CanonicalVT = VT.is128BitVector() ? MVT::v16i8 : MVT::v8i8;
3391  SDValue Result = DAG.getNode(AArch64ISD::NEON_BSL, DL, CanonicalVT,
3392  N0->getOperand(1), N0->getOperand(0),
3393  N1->getOperand(0));
3394  return DAG.getNode(ISD::BITCAST, DL, VT, Result);
3395  }
3396  }
3397  }
3398 
3399  return SDValue();
3400 }
3401 
3402 /// Target-specific dag combine xforms for ISD::SRA
3405 
3406  SelectionDAG &DAG = DCI.DAG;
3407  SDLoc DL(N);
3408  EVT VT = N->getValueType(0);
3409 
3410  // We're looking for an SRA/SHL pair which form an SBFX.
3411 
3412  if (VT != MVT::i32 && VT != MVT::i64)
3413  return SDValue();
3414 
3415  if (!isa<ConstantSDNode>(N->getOperand(1)))
3416  return SDValue();
3417 
3418  uint64_t ExtraSignBits = N->getConstantOperandVal(1);
3419  SDValue Shift = N->getOperand(0);
3420 
3421  if (Shift.getOpcode() != ISD::SHL)
3422  return SDValue();
3423 
3424  if (!isa<ConstantSDNode>(Shift->getOperand(1)))
3425  return SDValue();
3426 
3427  uint64_t BitsOnLeft = Shift->getConstantOperandVal(1);
3428  uint64_t Width = VT.getSizeInBits() - ExtraSignBits;
3429  uint64_t LSB = VT.getSizeInBits() - Width - BitsOnLeft;
3430 
3431  if (LSB > VT.getSizeInBits() || Width > VT.getSizeInBits())
3432  return SDValue();
3433 
3434  return DAG.getNode(AArch64ISD::SBFX, DL, VT, Shift.getOperand(0),
3435  DAG.getConstant(LSB, MVT::i64),
3436  DAG.getConstant(LSB + Width - 1, MVT::i64));
3437 }
3438 
3439 /// Check if this is a valid build_vector for the immediate operand of
3440 /// a vector shift operation, where all the elements of the build_vector
3441 /// must have the same constant integer value.
3442 static bool getVShiftImm(SDValue Op, unsigned ElementBits, int64_t &Cnt) {
3443  // Ignore bit_converts.
3444  while (Op.getOpcode() == ISD::BITCAST)
3445  Op = Op.getOperand(0);
3447  APInt SplatBits, SplatUndef;
3448  unsigned SplatBitSize;
3449  bool HasAnyUndefs;
3450  if (!BVN || !BVN->isConstantSplat(SplatBits, SplatUndef, SplatBitSize,
3451  HasAnyUndefs, ElementBits) ||
3452  SplatBitSize > ElementBits)
3453  return false;
3454  Cnt = SplatBits.getSExtValue();
3455  return true;
3456 }
3457 
3458 /// Check if this is a valid build_vector for the immediate operand of
3459 /// a vector shift left operation. That value must be in the range:
3460 /// 0 <= Value < ElementBits
3461 static bool isVShiftLImm(SDValue Op, EVT VT, int64_t &Cnt) {
3462  assert(VT.isVector() && "vector shift count is not a vector type");
3463  unsigned ElementBits = VT.getVectorElementType().getSizeInBits();
3464  if (!getVShiftImm(Op, ElementBits, Cnt))
3465  return false;
3466  return (Cnt >= 0 && Cnt < ElementBits);
3467 }
3468 
3469 /// Check if this is a valid build_vector for the immediate operand of a
3470 /// vector shift right operation. The value must be in the range:
3471 /// 1 <= Value <= ElementBits
3472 static bool isVShiftRImm(SDValue Op, EVT VT, int64_t &Cnt) {
3473  assert(VT.isVector() && "vector shift count is not a vector type");
3474  unsigned ElementBits = VT.getVectorElementType().getSizeInBits();
3475  if (!getVShiftImm(Op, ElementBits, Cnt))
3476  return false;
3477  return (Cnt >= 1 && Cnt <= ElementBits);
3478 }
3479 
3480 /// Checks for immediate versions of vector shifts and lowers them.
3483  const AArch64Subtarget *ST) {
3484  SelectionDAG &DAG = DCI.DAG;
3485  EVT VT = N->getValueType(0);
3486  if (N->getOpcode() == ISD::SRA && (VT == MVT::i32 || VT == MVT::i64))
3487  return PerformSRACombine(N, DCI);
3488 
3489  // Nothing to be done for scalar shifts.
3490  const TargetLowering &TLI = DAG.getTargetLoweringInfo();
3491  if (!VT.isVector() || !TLI.isTypeLegal(VT))
3492  return SDValue();
3493 
3494  assert(ST->hasNEON() && "unexpected vector shift");
3495  int64_t Cnt;
3496 
3497  switch (N->getOpcode()) {
3498  default:
3499  llvm_unreachable("unexpected shift opcode");
3500 
3501  case ISD::SHL:
3502  if (isVShiftLImm(N->getOperand(1), VT, Cnt)) {
3503  SDValue RHS =
3505  DAG.getConstant(Cnt, MVT::i32));
3506  return DAG.getNode(ISD::SHL, SDLoc(N), VT, N->getOperand(0), RHS);
3507  }
3508  break;
3509 
3510  case ISD::SRA:
3511  case ISD::SRL:
3512  if (isVShiftRImm(N->getOperand(1), VT, Cnt)) {
3513  SDValue RHS =
3515  DAG.getConstant(Cnt, MVT::i32));
3516  return DAG.getNode(N->getOpcode(), SDLoc(N), VT, N->getOperand(0), RHS);
3517  }
3518  break;
3519  }
3520 
3521  return SDValue();
3522 }
3523 
3524 /// ARM-specific DAG combining for intrinsics.
3526  unsigned IntNo = cast<ConstantSDNode>(N->getOperand(0))->getZExtValue();
3527 
3528  switch (IntNo) {
3529  default:
3530  // Don't do anything for most intrinsics.
3531  break;
3532 
3535  EVT VT = N->getOperand(1).getValueType();
3536  int64_t Cnt;
3537  if (!isVShiftLImm(N->getOperand(2), VT, Cnt))
3538  break;
3539  unsigned VShiftOpc = (IntNo == Intrinsic::arm_neon_vqshifts)
3542  return DAG.getNode(VShiftOpc, SDLoc(N), N->getValueType(0),
3543  N->getOperand(1), DAG.getConstant(Cnt, MVT::i32));
3544  }
3545 
3546  return SDValue();
3547 }
3548 
3549 /// Target-specific DAG combine function for NEON load/store intrinsics
3550 /// to merge base address updates.
3553  if (DCI.isBeforeLegalize() || DCI.isCalledByLegalizer())
3554  return SDValue();
3555 
3556  SelectionDAG &DAG = DCI.DAG;
3557  bool isIntrinsic = (N->getOpcode() == ISD::INTRINSIC_VOID ||
3559  unsigned AddrOpIdx = (isIntrinsic ? 2 : 1);
3560  SDValue Addr = N->getOperand(AddrOpIdx);
3561 
3562  // Search for a use of the address operand that is an increment.
3563  for (SDNode::use_iterator UI = Addr.getNode()->use_begin(),
3564  UE = Addr.getNode()->use_end(); UI != UE; ++UI) {
3565  SDNode *User = *UI;
3566  if (User->getOpcode() != ISD::ADD ||
3567  UI.getUse().getResNo() != Addr.getResNo())
3568  continue;
3569 
3570  // Check that the add is independent of the load/store. Otherwise, folding
3571  // it would create a cycle.
3572  if (User->isPredecessorOf(N) || N->isPredecessorOf(User))
3573  continue;
3574 
3575  // Find the new opcode for the updating load/store.
3576  bool isLoad = true;
3577  bool isLaneOp = false;
3578  unsigned NewOpc = 0;
3579  unsigned NumVecs = 0;
3580  if (isIntrinsic) {
3581  unsigned IntNo = cast<ConstantSDNode>(N->getOperand(1))->getZExtValue();
3582  switch (IntNo) {
3583  default: llvm_unreachable("unexpected intrinsic for Neon base update");
3585  NumVecs = 1; break;
3587  NumVecs = 2; break;
3589  NumVecs = 3; break;
3591  NumVecs = 4; break;
3593  NumVecs = 1; isLoad = false; break;
3595  NumVecs = 2; isLoad = false; break;
3597  NumVecs = 3; isLoad = false; break;
3599  NumVecs = 4; isLoad = false; break;
3601  NumVecs = 2; break;
3603  NumVecs = 3; break;
3605  NumVecs = 4; break;
3607  NumVecs = 2; isLoad = false; break;
3609  NumVecs = 3; isLoad = false; break;
3611  NumVecs = 4; isLoad = false; break;
3613  NumVecs = 2; isLaneOp = true; break;
3615  NumVecs = 3; isLaneOp = true; break;
3617  NumVecs = 4; isLaneOp = true; break;
3619  NumVecs = 2; isLoad = false; isLaneOp = true; break;
3621  NumVecs = 3; isLoad = false; isLaneOp = true; break;
3623  NumVecs = 4; isLoad = false; isLaneOp = true; break;
3624  }
3625  } else {
3626  isLaneOp = true;
3627  switch (N->getOpcode()) {
3628  default: llvm_unreachable("unexpected opcode for Neon base update");
3630  NumVecs = 2; break;
3632  NumVecs = 3; break;
3634  NumVecs = 4; break;
3635  }
3636  }
3637 
3638  // Find the size of memory referenced by the load/store.
3639  EVT VecTy;
3640  if (isLoad)
3641  VecTy = N->getValueType(0);
3642  else
3643  VecTy = N->getOperand(AddrOpIdx + 1).getValueType();
3644  unsigned NumBytes = NumVecs * VecTy.getSizeInBits() / 8;
3645  if (isLaneOp)
3646  NumBytes /= VecTy.getVectorNumElements();
3647 
3648  // If the increment is a constant, it must match the memory ref size.
3649  SDValue Inc = User->getOperand(User->getOperand(0) == Addr ? 1 : 0);
3650  if (ConstantSDNode *CInc = dyn_cast<ConstantSDNode>(Inc.getNode())) {
3651  uint32_t IncVal = CInc->getZExtValue();
3652  if (IncVal != NumBytes)
3653  continue;
3654  Inc = DAG.getTargetConstant(IncVal, MVT::i32);
3655  }
3656 
3657  // Create the new updating load/store node.
3658  EVT Tys[6];
3659  unsigned NumResultVecs = (isLoad ? NumVecs : 0);
3660  unsigned n;
3661  for (n = 0; n < NumResultVecs; ++n)
3662  Tys[n] = VecTy;
3663  Tys[n++] = MVT::i64;
3664  Tys[n] = MVT::Other;
3665  SDVTList SDTys = DAG.getVTList(Tys, NumResultVecs + 2);
3667  Ops.push_back(N->getOperand(0)); // incoming chain
3668  Ops.push_back(N->getOperand(AddrOpIdx));
3669  Ops.push_back(Inc);
3670  for (unsigned i = AddrOpIdx + 1; i < N->getNumOperands(); ++i) {
3671  Ops.push_back(N->getOperand(i));
3672  }
3673  MemIntrinsicSDNode *MemInt = cast<MemIntrinsicSDNode>(N);
3674  SDValue UpdN = DAG.getMemIntrinsicNode(NewOpc, SDLoc(N), SDTys,
3675  Ops.data(), Ops.size(),
3676  MemInt->getMemoryVT(),
3677  MemInt->getMemOperand());
3678 
3679  // Update the uses.
3680  std::vector<SDValue> NewResults;
3681  for (unsigned i = 0; i < NumResultVecs; ++i) {
3682  NewResults.push_back(SDValue(UpdN.getNode(), i));
3683  }
3684  NewResults.push_back(SDValue(UpdN.getNode(), NumResultVecs + 1)); // chain
3685  DCI.CombineTo(N, NewResults);
3686  DCI.CombineTo(User, SDValue(UpdN.getNode(), NumResultVecs));
3687 
3688  break;
3689  }
3690  return SDValue();
3691 }
3692 
3693 /// For a VDUPLANE node N, check if its source operand is a vldN-lane (N > 1)
3694 /// intrinsic, and if all the other uses of that intrinsic are also VDUPLANEs.
3695 /// If so, combine them to a vldN-dup operation and return true.
3697  SelectionDAG &DAG = DCI.DAG;
3698  EVT VT = N->getValueType(0);
3699 
3700  // Check if the VDUPLANE operand is a vldN-dup intrinsic.
3701  SDNode *VLD = N->getOperand(0).getNode();
3702  if (VLD->getOpcode() != ISD::INTRINSIC_W_CHAIN)
3703  return SDValue();
3704  unsigned NumVecs = 0;
3705  unsigned NewOpc = 0;
3706  unsigned IntNo = cast<ConstantSDNode>(VLD->getOperand(1))->getZExtValue();
3707  if (IntNo == Intrinsic::arm_neon_vld2lane) {
3708  NumVecs = 2;
3709  NewOpc = AArch64ISD::NEON_LD2DUP;
3710  } else if (IntNo == Intrinsic::arm_neon_vld3lane) {
3711  NumVecs = 3;
3712  NewOpc = AArch64ISD::NEON_LD3DUP;
3713  } else if (IntNo == Intrinsic::arm_neon_vld4lane) {
3714  NumVecs = 4;
3715  NewOpc = AArch64ISD::NEON_LD4DUP;
3716  } else {
3717  return SDValue();
3718  }
3719 
3720  // First check that all the vldN-lane uses are VDUPLANEs and that the lane
3721  // numbers match the load.
3722  unsigned VLDLaneNo =
3723  cast<ConstantSDNode>(VLD->getOperand(NumVecs + 3))->getZExtValue();
3724  for (SDNode::use_iterator UI = VLD->use_begin(), UE = VLD->use_end();
3725  UI != UE; ++UI) {
3726  // Ignore uses of the chain result.
3727  if (UI.getUse().getResNo() == NumVecs)
3728  continue;
3729  SDNode *User = *UI;
3730  if (User->getOpcode() != AArch64ISD::NEON_VDUPLANE ||
3731  VLDLaneNo != cast<ConstantSDNode>(User->getOperand(1))->getZExtValue())
3732  return SDValue();
3733  }
3734 
3735  // Create the vldN-dup node.
3736  EVT Tys[5];
3737  unsigned n;
3738  for (n = 0; n < NumVecs; ++n)
3739  Tys[n] = VT;
3740  Tys[n] = MVT::Other;
3741  SDVTList SDTys = DAG.getVTList(Tys, NumVecs + 1);
3742  SDValue Ops[] = { VLD->getOperand(0), VLD->getOperand(2) };
3743  MemIntrinsicSDNode *VLDMemInt = cast<MemIntrinsicSDNode>(VLD);
3744  SDValue VLDDup = DAG.getMemIntrinsicNode(NewOpc, SDLoc(VLD), SDTys, Ops, 2,
3745  VLDMemInt->getMemoryVT(),
3746  VLDMemInt->getMemOperand());
3747 
3748  // Update the uses.
3749  for (SDNode::use_iterator UI = VLD->use_begin(), UE = VLD->use_end();
3750  UI != UE; ++UI) {
3751  unsigned ResNo = UI.getUse().getResNo();
3752  // Ignore uses of the chain result.
3753  if (ResNo == NumVecs)
3754  continue;
3755  SDNode *User = *UI;
3756  DCI.CombineTo(User, SDValue(VLDDup.getNode(), ResNo));
3757  }
3758 
3759  // Now the vldN-lane intrinsic is dead except for its chain result.
3760  // Update uses of the chain.
3761  std::vector<SDValue> VLDDupResults;
3762  for (unsigned n = 0; n < NumVecs; ++n)
3763  VLDDupResults.push_back(SDValue(VLDDup.getNode(), n));
3764  VLDDupResults.push_back(SDValue(VLDDup.getNode(), NumVecs));
3765  DCI.CombineTo(VLD, VLDDupResults);
3766 
3767  return SDValue(N, 0);
3768 }
3769 
3770 SDValue
3772  DAGCombinerInfo &DCI) const {
3773  switch (N->getOpcode()) {
3774  default: break;
3775  case ISD::AND: return PerformANDCombine(N, DCI);
3776  case ISD::OR: return PerformORCombine(N, DCI, getSubtarget());
3777  case ISD::SHL:
3778  case ISD::SRA:
3779  case ISD::SRL:
3780  return PerformShiftCombine(N, DCI, getSubtarget());
3782  return PerformIntrinsicCombine(N, DCI.DAG);
3784  return CombineVLDDUP(N, DCI);
3788  return CombineBaseUpdate(N, DCI);
3789  case ISD::INTRINSIC_VOID:
3791  switch (cast<ConstantSDNode>(N->getOperand(1))->getZExtValue()) {
3812  return CombineBaseUpdate(N, DCI);
3813  default:
3814  break;
3815  }
3816  }
3817  return SDValue();
3818 }
3819 
3820 bool
3822  VT = VT.getScalarType();
3823 
3824  if (!VT.isSimple())
3825  return false;
3826 
3827  switch (VT.getSimpleVT().SimpleTy) {
3828  case MVT::f16:
3829  case MVT::f32:
3830  case MVT::f64:
3831  return true;
3832  case MVT::f128:
3833  return false;
3834  default:
3835  break;
3836  }
3837 
3838  return false;
3839 }
3840 
3841 // Check whether a Build Vector could be presented as Shuffle Vector. If yes,
3842 // try to call LowerVECTOR_SHUFFLE to lower it.
3844  SDValue &Res) const {
3845  SDLoc DL(Op);
3846  EVT VT = Op.getValueType();
3847  unsigned NumElts = VT.getVectorNumElements();
3848  unsigned V0NumElts = 0;
3849  int Mask[16];
3850  SDValue V0, V1;
3851 
3852  // Check if all elements are extracted from less than 3 vectors.
3853  for (unsigned i = 0; i < NumElts; ++i) {
3854  SDValue Elt = Op.getOperand(i);
3855  if (Elt.getOpcode() != ISD::EXTRACT_VECTOR_ELT)
3856  return false;
3857 
3858  if (V0.getNode() == 0) {
3859  V0 = Elt.getOperand(0);
3860  V0NumElts = V0.getValueType().getVectorNumElements();
3861  }
3862  if (Elt.getOperand(0) == V0) {
3863  Mask[i] = (cast<ConstantSDNode>(Elt->getOperand(1))->getZExtValue());
3864  continue;
3865  } else if (V1.getNode() == 0) {
3866  V1 = Elt.getOperand(0);
3867  }
3868  if (Elt.getOperand(0) == V1) {
3869  unsigned Lane = cast<ConstantSDNode>(Elt->getOperand(1))->getZExtValue();
3870  Mask[i] = (Lane + V0NumElts);
3871  continue;
3872  } else {
3873  return false;
3874  }
3875  }
3876 
3877  if (!V1.getNode() && V0NumElts == NumElts * 2) {
3878  V1 = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, VT, V0,
3879  DAG.getConstant(NumElts, MVT::i64));
3880  V0 = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, VT, V0,
3881  DAG.getConstant(0, MVT::i64));
3882  V0NumElts = V0.getValueType().getVectorNumElements();
3883  }
3884 
3885  if (V1.getNode() && NumElts == V0NumElts &&
3886  V0NumElts == V1.getValueType().getVectorNumElements()) {
3887  SDValue Shuffle = DAG.getVectorShuffle(VT, DL, V0, V1, Mask);
3888  Res = LowerVECTOR_SHUFFLE(Shuffle, DAG);
3889  return true;
3890  } else
3891  return false;
3892 }
3893 
3894 // If this is a case we can't handle, return null and let the default
3895 // expansion code take care of it.
3896 SDValue
3898  const AArch64Subtarget *ST) const {
3899 
3900  BuildVectorSDNode *BVN = cast<BuildVectorSDNode>(Op.getNode());
3901  SDLoc DL(Op);
3902  EVT VT = Op.getValueType();
3903 
3904  APInt SplatBits, SplatUndef;
3905  unsigned SplatBitSize;
3906  bool HasAnyUndefs;
3907 
3908  unsigned UseNeonMov = VT.getSizeInBits() >= 64;
3909 
3910  // Note we favor lowering MOVI over MVNI.
3911  // This has implications on the definition of patterns in TableGen to select
3912  // BIC immediate instructions but not ORR immediate instructions.
3913  // If this lowering order is changed, TableGen patterns for BIC immediate and
3914  // ORR immediate instructions have to be updated.
3915  if (UseNeonMov &&
3916  BVN->isConstantSplat(SplatBits, SplatUndef, SplatBitSize, HasAnyUndefs)) {
3917  if (SplatBitSize <= 64) {
3918  // First attempt to use vector immediate-form MOVI
3919  EVT NeonMovVT;
3920  unsigned Imm = 0;
3921  unsigned OpCmode = 0;
3922 
3923  if (isNeonModifiedImm(SplatBits.getZExtValue(), SplatUndef.getZExtValue(),
3924  SplatBitSize, DAG, VT.is128BitVector(),
3925  Neon_Mov_Imm, NeonMovVT, Imm, OpCmode)) {
3926  SDValue ImmVal = DAG.getTargetConstant(Imm, MVT::i32);
3927  SDValue OpCmodeVal = DAG.getConstant(OpCmode, MVT::i32);
3928 
3929  if (ImmVal.getNode() && OpCmodeVal.getNode()) {
3930  SDValue NeonMov = DAG.getNode(AArch64ISD::NEON_MOVIMM, DL, NeonMovVT,
3931  ImmVal, OpCmodeVal);
3932  return DAG.getNode(ISD::BITCAST, DL, VT, NeonMov);
3933  }
3934  }
3935 
3936  // Then attempt to use vector immediate-form MVNI
3937  uint64_t NegatedImm = (~SplatBits).getZExtValue();
3938  if (isNeonModifiedImm(NegatedImm, SplatUndef.getZExtValue(), SplatBitSize,
3939  DAG, VT.is128BitVector(), Neon_Mvn_Imm, NeonMovVT,
3940  Imm, OpCmode)) {
3941  SDValue ImmVal = DAG.getTargetConstant(Imm, MVT::i32);
3942  SDValue OpCmodeVal = DAG.getConstant(OpCmode, MVT::i32);
3943  if (ImmVal.getNode() && OpCmodeVal.getNode()) {
3944  SDValue NeonMov = DAG.getNode(AArch64ISD::NEON_MVNIMM, DL, NeonMovVT,
3945  ImmVal, OpCmodeVal);
3946  return DAG.getNode(ISD::BITCAST, DL, VT, NeonMov);
3947  }
3948  }
3949 
3950  // Attempt to use vector immediate-form FMOV
3951  if (((VT == MVT::v2f32 || VT == MVT::v4f32) && SplatBitSize == 32) ||
3952  (VT == MVT::v2f64 && SplatBitSize == 64)) {
3953  APFloat RealVal(
3954  SplatBitSize == 32 ? APFloat::IEEEsingle : APFloat::IEEEdouble,
3955  SplatBits);
3956  uint32_t ImmVal;
3957  if (A64Imms::isFPImm(RealVal, ImmVal)) {
3958  SDValue Val = DAG.getTargetConstant(ImmVal, MVT::i32);
3959  return DAG.getNode(AArch64ISD::NEON_FMOVIMM, DL, VT, Val);
3960  }
3961  }
3962  }
3963  }
3964 
3965  unsigned NumElts = VT.getVectorNumElements();
3966  bool isOnlyLowElement = true;
3967  bool usesOnlyOneValue = true;
3968  bool hasDominantValue = false;
3969  bool isConstant = true;
3970 
3971  // Map of the number of times a particular SDValue appears in the
3972  // element list.
3973  DenseMap<SDValue, unsigned> ValueCounts;
3974  SDValue Value;
3975  for (unsigned i = 0; i < NumElts; ++i) {
3976  SDValue V = Op.getOperand(i);
3977  if (V.getOpcode() == ISD::UNDEF)
3978  continue;
3979  if (i > 0)
3980  isOnlyLowElement = false;
3981  if (!isa<ConstantFPSDNode>(V) && !isa<ConstantSDNode>(V))
3982  isConstant = false;
3983 
3984  ValueCounts.insert(std::make_pair(V, 0));
3985  unsigned &Count = ValueCounts[V];
3986 
3987  // Is this value dominant? (takes up more than half of the lanes)
3988  if (++Count > (NumElts / 2)) {
3989  hasDominantValue = true;
3990  Value = V;
3991  }
3992  }
3993  if (ValueCounts.size() != 1)
3994  usesOnlyOneValue = false;
3995  if (!Value.getNode() && ValueCounts.size() > 0)
3996  Value = ValueCounts.begin()->first;
3997 
3998  if (ValueCounts.size() == 0)
3999  return DAG.getUNDEF(VT);
4000 
4001  // Loads are better lowered with insert_vector_elt.
4002  // Keep going if we are hitting this case.
4003  if (isOnlyLowElement && !ISD::isNormalLoad(Value.getNode()))
4004  return DAG.getNode(ISD::SCALAR_TO_VECTOR, DL, VT, Value);
4005 
4006  unsigned EltSize = VT.getVectorElementType().getSizeInBits();
4007  // Use VDUP for non-constant splats.
4008  if (hasDominantValue && EltSize <= 64) {
4009  if (!isConstant) {
4010  SDValue N;
4011 
4012  // If we are DUPing a value that comes directly from a vector, we could
4013  // just use DUPLANE. We can only do this if the lane being extracted
4014  // is at a constant index, as the DUP from lane instructions only have
4015  // constant-index forms.
4016  if (Value->getOpcode() == ISD::EXTRACT_VECTOR_ELT &&
4017  isa<ConstantSDNode>(Value->getOperand(1))) {
4018  N = DAG.getNode(AArch64ISD::NEON_VDUPLANE, DL, VT,
4019  Value->getOperand(0), Value->getOperand(1));
4020  } else
4021  N = DAG.getNode(AArch64ISD::NEON_VDUP, DL, VT, Value);
4022 
4023  if (!usesOnlyOneValue) {
4024  // The dominant value was splatted as 'N', but we now have to insert
4025  // all differing elements.
4026  for (unsigned I = 0; I < NumElts; ++I) {
4027  if (Op.getOperand(I) == Value)
4028  continue;
4030  Ops.push_back(N);
4031  Ops.push_back(Op.getOperand(I));
4032  Ops.push_back(DAG.getConstant(I, MVT::i64));
4033  N = DAG.getNode(ISD::INSERT_VECTOR_ELT, DL, VT, &Ops[0], 3);
4034  }
4035  }
4036  return N;
4037  }
4038  if (usesOnlyOneValue && isConstant) {
4039  return DAG.getNode(AArch64ISD::NEON_VDUP, DL, VT, Value);
4040  }
4041  }
4042  // If all elements are constants and the case above didn't get hit, fall back
4043  // to the default expansion, which will generate a load from the constant
4044  // pool.
4045  if (isConstant)
4046  return SDValue();
4047 
4048  // Try to lower this in lowering ShuffleVector way.
4049  SDValue Shuf;
4050  if (isKnownShuffleVector(Op, DAG, Shuf))
4051  return Shuf;
4052 
4053  // If all else fails, just use a sequence of INSERT_VECTOR_ELT when we
4054  // know the default expansion would otherwise fall back on something even
4055  // worse. For a vector with one or two non-undef values, that's
4056  // scalar_to_vector for the elements followed by a shuffle (provided the
4057  // shuffle is valid for the target) and materialization element by element
4058  // on the stack followed by a load for everything else.
4059  if (!isConstant && !usesOnlyOneValue) {
4060  SDValue Vec = DAG.getUNDEF(VT);
4061  for (unsigned i = 0 ; i < NumElts; ++i) {
4062  SDValue V = Op.getOperand(i);
4063  if (V.getOpcode() == ISD::UNDEF)
4064  continue;
4065  SDValue LaneIdx = DAG.getConstant(i, MVT::i64);
4066  Vec = DAG.getNode(ISD::INSERT_VECTOR_ELT, DL, VT, Vec, V, LaneIdx);
4067  }
4068  return Vec;
4069  }
4070  return SDValue();
4071 }
4072 
4073 /// isREVMask - Check if a vector shuffle corresponds to a REV
4074 /// instruction with the specified blocksize. (The order of the elements
4075 /// within each block of the vector is reversed.)
4076 static bool isREVMask(ArrayRef<int> M, EVT VT, unsigned BlockSize) {
4077  assert((BlockSize == 16 || BlockSize == 32 || BlockSize == 64) &&
4078  "Only possible block sizes for REV are: 16, 32, 64");
4079 
4080  unsigned EltSz = VT.getVectorElementType().getSizeInBits();
4081  if (EltSz == 64)
4082  return false;
4083 
4084  unsigned NumElts = VT.getVectorNumElements();
4085  unsigned BlockElts = M[0] + 1;
4086  // If the first shuffle index is UNDEF, be optimistic.
4087  if (M[0] < 0)
4088  BlockElts = BlockSize / EltSz;
4089 
4090  if (BlockSize <= EltSz || BlockSize != BlockElts * EltSz)
4091  return false;
4092 
4093  for (unsigned i = 0; i < NumElts; ++i) {
4094  if (M[i] < 0)
4095  continue; // ignore UNDEF indices
4096  if ((unsigned)M[i] != (i - i % BlockElts) + (BlockElts - 1 - i % BlockElts))
4097  return false;
4098  }
4099 
4100  return true;
4101 }
4102 
4103 // isPermuteMask - Check whether the vector shuffle matches to UZP, ZIP and
4104 // TRN instruction.
4105 static unsigned isPermuteMask(ArrayRef<int> M, EVT VT) {
4106  unsigned NumElts = VT.getVectorNumElements();
4107  if (NumElts < 4)
4108  return 0;
4109 
4110  bool ismatch = true;
4111 
4112  // Check UZP1
4113  for (unsigned i = 0; i < NumElts; ++i) {
4114  if ((unsigned)M[i] != i * 2) {
4115  ismatch = false;
4116  break;
4117  }
4118  }
4119  if (ismatch)
4120  return AArch64ISD::NEON_UZP1;
4121 
4122  // Check UZP2
4123  ismatch = true;
4124  for (unsigned i = 0; i < NumElts; ++i) {
4125  if ((unsigned)M[i] != i * 2 + 1) {
4126  ismatch = false;
4127  break;
4128  }
4129  }
4130  if (ismatch)
4131  return AArch64ISD::NEON_UZP2;
4132 
4133  // Check ZIP1
4134  ismatch = true;
4135  for (unsigned i = 0; i < NumElts; ++i) {
4136  if ((unsigned)M[i] != i / 2 + NumElts * (i % 2)) {
4137  ismatch = false;
4138  break;
4139  }
4140  }
4141  if (ismatch)
4142  return AArch64ISD::NEON_ZIP1;
4143 
4144  // Check ZIP2
4145  ismatch = true;
4146  for (unsigned i = 0; i < NumElts; ++i) {
4147  if ((unsigned)M[i] != (NumElts + i) / 2 + NumElts * (i % 2)) {
4148  ismatch = false;
4149  break;
4150  }
4151  }
4152  if (ismatch)
4153  return AArch64ISD::NEON_ZIP2;
4154 
4155  // Check TRN1
4156  ismatch = true;
4157  for (unsigned i = 0; i < NumElts; ++i) {
4158  if ((unsigned)M[i] != i + (NumElts - 1) * (i % 2)) {
4159  ismatch = false;
4160  break;
4161  }
4162  }
4163  if (ismatch)
4164  return AArch64ISD::NEON_TRN1;
4165 
4166  // Check TRN2
4167  ismatch = true;
4168  for (unsigned i = 0; i < NumElts; ++i) {
4169  if ((unsigned)M[i] != 1 + i + (NumElts - 1) * (i % 2)) {
4170  ismatch = false;
4171  break;
4172  }
4173  }
4174  if (ismatch)
4175  return AArch64ISD::NEON_TRN2;
4176 
4177  return 0;
4178 }
4179 
4180 SDValue
4182  SelectionDAG &DAG) const {
4183  SDValue V1 = Op.getOperand(0);
4184  SDValue V2 = Op.getOperand(1);
4185  SDLoc dl(Op);
4186  EVT VT = Op.getValueType();
4187  ShuffleVectorSDNode *SVN = cast<ShuffleVectorSDNode>(Op.getNode());
4188 
4189  // Convert shuffles that are directly supported on NEON to target-specific
4190  // DAG nodes, instead of keeping them as shuffles and matching them again
4191  // during code selection. This is more efficient and avoids the possibility
4192  // of inconsistencies between legalization and selection.
4193  ArrayRef<int> ShuffleMask = SVN->getMask();
4194 
4195  unsigned EltSize = VT.getVectorElementType().getSizeInBits();
4196  if (EltSize > 64)
4197  return SDValue();
4198 
4199  if (isREVMask(ShuffleMask, VT, 64))
4200  return DAG.getNode(AArch64ISD::NEON_REV64, dl, VT, V1);
4201  if (isREVMask(ShuffleMask, VT, 32))
4202  return DAG.getNode(AArch64ISD::NEON_REV32, dl, VT, V1);
4203  if (isREVMask(ShuffleMask, VT, 16))
4204  return DAG.getNode(AArch64ISD::NEON_REV16, dl, VT, V1);
4205 
4206  unsigned ISDNo = isPermuteMask(ShuffleMask, VT);
4207  if (ISDNo)
4208  return DAG.getNode(ISDNo, dl, VT, V1, V2);
4209 
4210  // If the element of shuffle mask are all the same constant, we can
4211  // transform it into either NEON_VDUP or NEON_VDUPLANE
4212  if (ShuffleVectorSDNode::isSplatMask(&ShuffleMask[0], VT)) {
4213  int Lane = SVN->getSplatIndex();
4214  // If this is undef splat, generate it via "just" vdup, if possible.
4215  if (Lane == -1) Lane = 0;
4216 
4217  // Test if V1 is a SCALAR_TO_VECTOR.
4218  if (V1.getOpcode() == ISD::SCALAR_TO_VECTOR) {
4219  return DAG.getNode(AArch64ISD::NEON_VDUP, dl, VT, V1.getOperand(0));
4220  }
4221  // Test if V1 is a BUILD_VECTOR which is equivalent to a SCALAR_TO_VECTOR.
4222  if (V1.getOpcode() == ISD::BUILD_VECTOR) {
4223  bool IsScalarToVector = true;
4224  for (unsigned i = 0, e = V1.getNumOperands(); i != e; ++i)
4225  if (V1.getOperand(i).getOpcode() != ISD::UNDEF &&
4226  i != (unsigned)Lane) {
4227  IsScalarToVector = false;
4228  break;
4229  }
4230  if (IsScalarToVector)
4231  return DAG.getNode(AArch64ISD::NEON_VDUP, dl, VT,
4232  V1.getOperand(Lane));
4233  }
4234  return DAG.getNode(AArch64ISD::NEON_VDUPLANE, dl, VT, V1,
4235  DAG.getConstant(Lane, MVT::i64));
4236  }
4237 
4238  int Length = ShuffleMask.size();
4239  int V1EltNum = V1.getValueType().getVectorNumElements();
4240 
4241  // If the number of v1 elements is the same as the number of shuffle mask
4242  // element and the shuffle masks are sequential values, we can transform
4243  // it into NEON_VEXTRACT.
4244  if (V1EltNum == Length) {
4245  // Check if the shuffle mask is sequential.
4246  bool IsSequential = true;
4247  int CurMask = ShuffleMask[0];
4248  for (int I = 0; I < Length; ++I) {
4249  if (ShuffleMask[I] != CurMask) {
4250  IsSequential = false;
4251  break;
4252  }
4253  CurMask++;
4254  }
4255  if (IsSequential) {
4256  assert((EltSize % 8 == 0) && "Bitsize of vector element is incorrect");
4257  unsigned VecSize = EltSize * V1EltNum;
4258  unsigned Index = (EltSize/8) * ShuffleMask[0];
4259  if (VecSize == 64 || VecSize == 128)
4260  return DAG.getNode(AArch64ISD::NEON_VEXTRACT, dl, VT, V1, V2,
4261  DAG.getConstant(Index, MVT::i64));
4262  }
4263  }
4264 
4265  // For shuffle mask like "0, 1, 2, 3, 4, 5, 13, 7", try to generate insert
4266  // by element from V2 to V1 .
4267  // If shuffle mask is like "0, 1, 10, 11, 12, 13, 14, 15", V2 would be a
4268  // better choice to be inserted than V1 as less insert needed, so we count
4269  // element to be inserted for both V1 and V2, and select less one as insert
4270  // target.
4271 
4272  // Collect elements need to be inserted and their index.
4273  SmallVector<int, 8> NV1Elt;
4274  SmallVector<int, 8> N1Index;
4275  SmallVector<int, 8> NV2Elt;
4276  SmallVector<int, 8> N2Index;
4277  for (int I = 0; I != Length; ++I) {
4278  if (ShuffleMask[I] != I) {
4279  NV1Elt.push_back(ShuffleMask[I]);
4280  N1Index.push_back(I);
4281  }
4282  }
4283  for (int I = 0; I != Length; ++I) {
4284  if (ShuffleMask[I] != (I + V1EltNum)) {
4285  NV2Elt.push_back(ShuffleMask[I]);
4286  N2Index.push_back(I);
4287  }
4288  }
4289 
4290  // Decide which to be inserted. If all lanes mismatch, neither V1 nor V2
4291  // will be inserted.
4292  SDValue InsV = V1;
4293  SmallVector<int, 8> InsMasks = NV1Elt;
4294  SmallVector<int, 8> InsIndex = N1Index;
4295  if ((int)NV1Elt.size() != Length || (int)NV2Elt.size() != Length) {
4296  if (NV1Elt.size() > NV2Elt.size()) {
4297  InsV = V2;
4298  InsMasks = NV2Elt;
4299  InsIndex = N2Index;
4300  }
4301  } else {
4302  InsV = DAG.getNode(ISD::UNDEF, dl, VT);
4303  }
4304 
4305  for (int I = 0, E = InsMasks.size(); I != E; ++I) {
4306  SDValue ExtV = V1;
4307  int Mask = InsMasks[I];
4308  if (Mask >= V1EltNum) {
4309  ExtV = V2;
4310  Mask -= V1EltNum;
4311  }
4312  // Any value type smaller than i32 is illegal in AArch64, and this lower
4313  // function is called after legalize pass, so we need to legalize
4314  // the result here.
4315  EVT EltVT;
4317  EltVT = (EltSize == 64) ? MVT::f64 : MVT::f32;
4318  else
4319  EltVT = (EltSize == 64) ? MVT::i64 : MVT::i32;
4320 
4321  if (Mask >= 0) {
4322  ExtV = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, EltVT, ExtV,
4323  DAG.getConstant(Mask, MVT::i64));
4324  InsV = DAG.getNode(ISD::INSERT_VECTOR_ELT, dl, VT, InsV, ExtV,
4325  DAG.getConstant(InsIndex[I], MVT::i64));
4326  }
4327  }
4328  return InsV;
4329 }
4330 
4332 AArch64TargetLowering::getConstraintType(const std::string &Constraint) const {
4333  if (Constraint.size() == 1) {
4334  switch (Constraint[0]) {
4335  default: break;
4336  case 'w': // An FP/SIMD vector register
4337  return C_RegisterClass;
4338  case 'I': // Constant that can be used with an ADD instruction
4339  case 'J': // Constant that can be used with a SUB instruction
4340  case 'K': // Constant that can be used with a 32-bit logical instruction
4341  case 'L': // Constant that can be used with a 64-bit logical instruction
4342  case 'M': // Constant that can be used as a 32-bit MOV immediate
4343  case 'N': // Constant that can be used as a 64-bit MOV immediate
4344  case 'Y': // Floating point constant zero
4345  case 'Z': // Integer constant zero
4346  return C_Other;
4347  case 'Q': // A memory reference with base register and no offset
4348  return C_Memory;
4349  case 'S': // A symbolic address
4350  return C_Other;
4351  }
4352  }
4353 
4354  // FIXME: Ump, Utf, Usa, Ush
4355  // Ump: A memory address suitable for ldp/stp in SI, DI, SF and DF modes,
4356  // whatever they may be
4357  // Utf: A memory address suitable for ldp/stp in TF mode, whatever it may be
4358  // Usa: An absolute symbolic address
4359  // Ush: The high part (bits 32:12) of a pc-relative symbolic address
4360  assert(Constraint != "Ump" && Constraint != "Utf" && Constraint != "Usa"
4361  && Constraint != "Ush" && "Unimplemented constraints");
4362 
4363  return TargetLowering::getConstraintType(Constraint);
4364 }
4365 
4368  const char *Constraint) const {
4369 
4370  llvm_unreachable("Constraint weight unimplemented");
4371 }
4372 
4373 void
4375  std::string &Constraint,
4376  std::vector<SDValue> &Ops,
4377  SelectionDAG &DAG) const {
4378  SDValue Result(0, 0);
4379 
4380  // Only length 1 constraints are C_Other.
4381  if (Constraint.size() != 1) return;
4382 
4383  // Only C_Other constraints get lowered like this. That means constants for us
4384  // so return early if there's no hope the constraint can be lowered.
4385 
4386  switch(Constraint[0]) {
4387  default: break;
4388  case 'I': case 'J': case 'K': case 'L':
4389  case 'M': case 'N': case 'Z': {
4391  if (!C)
4392  return;
4393 
4394  uint64_t CVal = C->getZExtValue();
4395  uint32_t Bits;
4396 
4397  switch (Constraint[0]) {
4398  default:
4399  // FIXME: 'M' and 'N' are MOV pseudo-insts -- unsupported in assembly. 'J'
4400  // is a peculiarly useless SUB constraint.
4401  llvm_unreachable("Unimplemented C_Other constraint");
4402  case 'I':
4403  if (CVal <= 0xfff)
4404  break;
4405  return;
4406  case 'K':
4407  if (A64Imms::isLogicalImm(32, CVal, Bits))
4408  break;
4409  return;
4410  case 'L':
4411  if (A64Imms::isLogicalImm(64, CVal, Bits))
4412  break;
4413  return;
4414  case 'Z':
4415  if (CVal == 0)
4416  break;
4417  return;
4418  }
4419 
4420  Result = DAG.getTargetConstant(CVal, Op.getValueType());
4421  break;
4422  }
4423  case 'S': {
4424  // An absolute symbolic address or label reference.
4425  if (const GlobalAddressSDNode *GA = dyn_cast<GlobalAddressSDNode>(Op)) {
4426  Result = DAG.getTargetGlobalAddress(GA->getGlobal(), SDLoc(Op),
4427  GA->getValueType(0));
4428  } else if (const BlockAddressSDNode *BA
4429  = dyn_cast<BlockAddressSDNode>(Op)) {
4430  Result = DAG.getTargetBlockAddress(BA->getBlockAddress(),
4431  BA->getValueType(0));
4432  } else if (const ExternalSymbolSDNode *ES
4433  = dyn_cast<ExternalSymbolSDNode>(Op)) {
4434  Result = DAG.getTargetExternalSymbol(ES->getSymbol(),
4435  ES->getValueType(0));
4436  } else
4437  return;
4438  break;
4439  }
4440  case 'Y':
4441  if (const ConstantFPSDNode *CFP = dyn_cast<ConstantFPSDNode>(Op)) {
4442  if (CFP->isExactlyValue(0.0)) {
4443  Result = DAG.getTargetConstantFP(0.0, CFP->getValueType(0));
4444  break;
4445  }
4446  }
4447  return;
4448  }
4449 
4450  if (Result.getNode()) {
4451  Ops.push_back(Result);
4452  return;
4453  }
4454 
4455  // It's an unknown constraint for us. Let generic code have a go.
4456  TargetLowering::LowerAsmOperandForConstraint(Op, Constraint, Ops, DAG);
4457 }
4458 
4459 std::pair<unsigned, const TargetRegisterClass*>
4461  const std::string &Constraint,
4462  MVT VT) const {
4463  if (Constraint.size() == 1) {
4464  switch (Constraint[0]) {
4465  case 'r':
4466  if (VT.getSizeInBits() <= 32)
4467  return std::make_pair(0U, &AArch64::GPR32RegClass);
4468  else if (VT == MVT::i64)
4469  return std::make_pair(0U, &AArch64::GPR64RegClass);
4470  break;
4471  case 'w':
4472  if (VT == MVT::f16)
4473  return std::make_pair(0U, &AArch64::FPR16RegClass);
4474  else if (VT == MVT::f32)
4475  return std::make_pair(0U, &AArch64::FPR32RegClass);
4476  else if (VT.getSizeInBits() == 64)
4477  return std::make_pair(0U, &AArch64::FPR64RegClass);
4478  else if (VT.getSizeInBits() == 128)
4479  return std::make_pair(0U, &AArch64::FPR128RegClass);
4480  break;
4481  }
4482  }
4483 
4484  // Use the default implementation in TargetLowering to convert the register
4485  // constraint into a member of a register class.
4486  return TargetLowering::getRegForInlineAsmConstraint(Constraint, VT);
4487 }
4488 
4489 /// Represent NEON load and store intrinsics as MemIntrinsicNodes.
4490 /// The associated MachineMemOperands record the alignment specified
4491 /// in the intrinsic calls.
4493  const CallInst &I,
4494  unsigned Intrinsic) const {
4495  switch (Intrinsic) {
4506  Info.opc = ISD::INTRINSIC_W_CHAIN;
4507  // Conservatively set memVT to the entire set of vectors loaded.
4508  uint64_t NumElts = getDataLayout()->getTypeAllocSize(I.getType()) / 8;
4509  Info.memVT = EVT::getVectorVT(I.getType()->getContext(), MVT::i64, NumElts);
4510  Info.ptrVal = I.getArgOperand(0);
4511  Info.offset = 0;
4512  Value *AlignArg = I.getArgOperand(I.getNumArgOperands() - 1);
4513  Info.align = cast<ConstantInt>(AlignArg)->getZExtValue();
4514  Info.vol = false; // volatile loads with NEON intrinsics not supported
4515  Info.readMem = true;
4516  Info.writeMem = false;
4517  return true;
4518  }
4529  Info.opc = ISD::INTRINSIC_VOID;
4530  // Conservatively set memVT to the entire set of vectors stored.
4531  unsigned NumElts = 0;
4532  for (unsigned ArgI = 1, ArgE = I.getNumArgOperands(); ArgI < ArgE; ++ArgI) {
4533  Type *ArgTy = I.getArgOperand(ArgI)->getType();
4534  if (!ArgTy->isVectorTy())
4535  break;
4536  NumElts += getDataLayout()->getTypeAllocSize(ArgTy) / 8;
4537  }
4538  Info.memVT = EVT::getVectorVT(I.getType()->getContext(), MVT::i64, NumElts);
4539  Info.ptrVal = I.getArgOperand(0);
4540  Info.offset = 0;
4541  Value *AlignArg = I.getArgOperand(I.getNumArgOperands() - 1);
4542  Info.align = cast<ConstantInt>(AlignArg)->getZExtValue();
4543  Info.vol = false; // volatile stores with NEON intrinsics not supported
4544  Info.readMem = false;
4545  Info.writeMem = true;
4546  return true;
4547  }
4548  default:
4549  break;
4550  }
4551 
4552  return false;
4553 }
AArch64TargetLowering(AArch64TargetMachine &TM)
void setFrameAddressIsTaken(bool T)
void AnalyzeCallResult(const SmallVectorImpl< ISD::InputArg > &Ins, CCAssignFn Fn)
const MachineFunction * getParent() const
SDValue getConstant(uint64_t Val, EVT VT, bool isTarget=false)
CallingConv::ID getLibcallCallingConv(RTLIB::Libcall Call) const
Get the CallingConv that should be used for the specified libcall.
SDValue getValue(unsigned R) const
SDValue LowerVECTOR_SHUFFLE(SDValue Op, SelectionDAG &DAG) const
MVT getValVT() const
MachineBasicBlock * emitAtomicBinaryMinMax(MachineInstr *MI, MachineBasicBlock *BB, unsigned Size, unsigned CmpOp, A64CC::CondCodes Cond) const
static SDValue tryCombineToLargerBFI(SDNode *N, TargetLowering::DAGCombinerInfo &DCI, const AArch64Subtarget *Subtarget)
virtual SDValue PerformDAGCombine(SDNode *N, DAGCombinerInfo &DCI) const
LLVMContext * getContext() const
Definition: SelectionDAG.h:285
SDValue getCopyToReg(SDValue Chain, SDLoc dl, unsigned Reg, SDValue N)
Definition: SelectionDAG.h:487
void SaveVarArgRegisters(CCState &CCInfo, SelectionDAG &DAG, SDLoc DL, SDValue &Chain) const
uint64_t getZExtValue() const
Get zero extended value.
Definition: APInt.h:1306
SDValue getCALLSEQ_END(SDValue Chain, SDValue Op1, SDValue Op2, SDValue InGlue, SDLoc DL)
Definition: SelectionDAG.h:572
static void getExclusiveOperation(unsigned Size, AtomicOrdering Ord, unsigned &LdrOpc, unsigned &StrOpc)
Reloc::Model getRelocationModel() const
bool isTargetLinux() const
LocInfo getLocInfo() const
static const fltSemantics IEEEdouble
Definition: APFloat.h:133
static TargetLoweringObjectFile * createTLOF(AArch64TargetMachine &TM)
SDValue LowerFRAMEADDR(SDValue Op, SelectionDAG &DAG) const
static A64CC::CondCodes IntCCToA64CC(ISD::CondCode CC)
virtual bool getTgtMemIntrinsic(IntrinsicInfo &Info, const CallInst &I, unsigned Intrinsic) const LLVM_OVERRIDE
static SDValue tryCombineToEXTR(SDNode *N, TargetLowering::DAGCombinerInfo &DCI)
const TargetMachine & getTargetMachine() const
unsigned getAlignment() const
Definition: GlobalValue.h:79
SDValue LowerFormalArguments(SDValue Chain, CallingConv::ID CallConv, bool isVarArg, const SmallVectorImpl< ISD::InputArg > &Ins, SDLoc dl, SelectionDAG &DAG, SmallVectorImpl< SDValue > &InVals) const
unsigned createVirtualRegister(const TargetRegisterClass *RegClass)
static SDValue PerformShiftCombine(SDNode *N, TargetLowering::DAGCombinerInfo &DCI, const AArch64Subtarget *ST)
Checks for immediate versions of vector shifts and lowers them.
enable_if_c<!is_simple_type< Y >::value, typename cast_retty< X, const Y >::ret_type >::type dyn_cast(const Y &Val)
Definition: Casting.h:266
virtual const uint32_t * getCallPreservedMask(CallingConv::ID) const
unsigned addLiveIn(unsigned PReg, const TargetRegisterClass *RC)
void AnalyzeFormalArguments(const SmallVectorImpl< ISD::InputArg > &Ins, CCAssignFn Fn)
virtual ConstraintType getConstraintType(const std::string &Constraint) const
Given a constraint, return the type of constraint it is for this target.
const GlobalValue * getGlobal() const
bool CCAssignFn(unsigned ValNo, MVT ValVT, MVT LocVT, CCValAssign::LocInfo LocInfo, ISD::ArgFlagsTy ArgFlags, CCState &State)
void LowerAsmOperandForConstraint(SDValue Op, std::string &Constraint, std::vector< SDValue > &Ops, SelectionDAG &DAG) const
unsigned getOpcode() const
SDValue LowerOperation(SDValue Op, SelectionDAG &DAG) const
void addLiveIn(unsigned Reg)
Type * getTypeForEVT(LLVMContext &Context) const
Definition: ValueTypes.cpp:180
unsigned getSizeInBits() const
Definition: ValueTypes.h:359
unsigned getByValSize() const
bool GVIsIndirectSymbol(const GlobalValue *GV, Reloc::Model RelocM) const
unsigned getNumOperands() const
void setBooleanVectorContents(BooleanContent Ty)
unsigned getNumOperands() const
arg_iterator arg_end()
Definition: Function.h:418
static unsigned isPermuteMask(ArrayRef< int > M, EVT VT)
const SDValue & getOperand(unsigned Num) const
void transferSuccessorsAndUpdatePHIs(MachineBasicBlock *fromMBB)
const Function * getFunction() const
static MachinePointerInfo getConstantPool()
SDValue getTargetExternalSymbol(const char *Sym, EVT VT, unsigned char TargetFlags=0)
bool isLogicalImm(unsigned RegWidth, uint64_t Imm, uint32_t &Bits)
SDValue LowerTLSDescCall(SDValue SymAddr, SDValue DescAddr, SDLoc DL, SelectionDAG &DAG) const
SDValue LowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG, const AArch64Subtarget *ST) const
SDValue LowerSELECT(SDValue Op, SelectionDAG &DAG) const
SDValue LowerFP_EXTEND(SDValue Op, SelectionDAG &DAG) const
SDValue LowerGlobalAddressELFLarge(SDValue Op, SelectionDAG &DAG) const
unsigned getResNo() const
get the index which selects a specific result in the SDNode
Libcall getFPROUND(EVT OpVT, EVT RetVT)
bool isUnsignedIntSetCC(CondCode Code)
Definition: ISDOpcodes.h:758
SDValue getSelectableIntSetCC(SDValue LHS, SDValue RHS, ISD::CondCode CC, SDValue &A64cc, SelectionDAG &DAG, SDLoc &dl) const
bool isRegLoc() const
SDValue getExternalSymbol(const char *Sym, EVT VT)
CallingConv::ID getCallingConv() const
Definition: Function.h:161
void setTruncStoreAction(MVT ValVT, MVT MemVT, LegalizeAction Action)
static MachinePointerInfo getFixedStack(int FI, int64_t offset=0)
Libcall getUINTTOFP(EVT OpVT, EVT RetVT)
static const unsigned NumFPRArgRegs
SDValue getLoad(EVT VT, SDLoc dl, SDValue Chain, SDValue Ptr, MachinePointerInfo PtrInfo, bool isVolatile, bool isNonTemporal, bool isInvariant, unsigned Alignment, const MDNode *TBAAInfo=0, const MDNode *Ranges=0)
bool isVector() const
isVector - Return true if this is a vector value type.
Definition: ValueTypes.h:661
CCAssignFn * CCAssignFnForNode(CallingConv::ID CC) const
SDValue getStore(SDValue Chain, SDLoc dl, SDValue Val, SDValue Ptr, MachinePointerInfo PtrInfo, bool isVolatile, bool isNonTemporal, unsigned Alignment, const MDNode *TBAAInfo=0)
const HexagonInstrInfo * TII
const char * getTargetNodeName(unsigned Opcode) const
This method returns the name of a target specific DAG node.
#define llvm_unreachable(msg)
bool isBuildVectorAllZeros(const SDNode *N)
EVT getValueType(unsigned ResNo) const
MachineFunction & getMachineFunction() const
Definition: SelectionDAG.h:280
std::pair< SDValue, SDValue > makeLibCall(SelectionDAG &DAG, RTLIB::Libcall LC, EVT RetVT, const SDValue *Ops, unsigned NumOps, bool isSigned, SDLoc dl, bool doesNotReturn=false, bool isReturnValueUsed=true) const
Returns a pair of (return value, chain).
SDValue LowerVASTART(SDValue Op, SelectionDAG &DAG) const
unsigned getNumArgOperands() const
SDValue getTargetGlobalAddress(const GlobalValue *GV, SDLoc DL, EVT VT, int64_t offset=0, unsigned char TargetFlags=0)
Definition: SelectionDAG.h:434
static const uint16_t AArch64FPRArgRegs[]
SimpleValueType SimpleTy
Definition: ValueTypes.h:161
SDValue addTokenForArgument(SDValue Chain, SelectionDAG &DAG, MachineFrameInfo *MFI, int ClobberedFI) const
EVT getScalarType() const
Definition: ValueTypes.h:756
Abstract Stack Frame Information.
void setOperationAction(unsigned Op, MVT VT, LegalizeAction Action)
std::pair< unsigned, const TargetRegisterClass * > getRegForInlineAsmConstraint(const std::string &Constraint, MVT VT) const
SDVTList getVTList(EVT VT)
virtual MVT getPointerTy(uint32_t=0) const
bool isKnownShuffleVector(SDValue Op, SelectionDAG &DAG, SDValue &Res) const
ID
LLVM Calling Convention Representation.
Definition: CallingConv.h:26
const MachineInstrBuilder & addImm(int64_t Val) const
#define G(x, y, z)
Definition: MD5.cpp:52
bool isInteger() const
isInteger - Return true if this is an integer, or a vector integer type.
Definition: ValueTypes.h:656
SDValue getConstantFP(double Val, EVT VT, bool isTarget=false)
SmallVector< ISD::InputArg, 32 > Ins
AtomicOrdering
Definition: Instructions.h:36
EVT getVectorElementType() const
Definition: ValueTypes.h:762
SDValue getCALLSEQ_START(SDValue Chain, SDValue Op, SDLoc DL)
Definition: SelectionDAG.h:563
unsigned getLocReg() const
static bool isVShiftRImm(SDValue Op, EVT VT, int64_t &Cnt)
MachineBasicBlock * emitAtomicBinary(MachineInstr *MI, MachineBasicBlock *MBB, unsigned Size, unsigned Opcode) const
bool isKill() const
LLVMContext & getContext() const
getContext - Return the LLVMContext in which this type was uniqued.
Definition: Type.h:128
std::pair< SDValue, SDValue > LowerCallTo(CallLoweringInfo &CLI) const
size_t array_lengthof(T(&)[N])
Find the length of an array.
Definition: STLExtras.h:250
bool LLVM_ATTRIBUTE_UNUSED_RESULT empty() const
Definition: SmallVector.h:56
SDValue getRegisterMask(const uint32_t *RegMask)
const uint32_t * getTLSDescCallPreservedMask() const
bool hasStructRetAttr() const
Determine if the function returns a structure through first pointer argument.
Definition: Function.h:299
int getOpcode() const
Definition: MachineInstr.h:261
enable_if_c< std::numeric_limits< T >::is_integer &&!std::numeric_limits< T >::is_signed, std::size_t >::type countTrailingZeros(T Val, ZeroBehavior ZB=ZB_Width)
Count number of 0's from the least significant bit to the most stopping at the first 1...
Definition: MathExtras.h:49
static SDValue PerformSRACombine(SDNode *N, TargetLowering::DAGCombinerInfo &DCI)
Target-specific dag combine xforms for ISD::SRA.
SDValue CombineTo(SDNode *N, const std::vector< SDValue > &To, bool AddTo=true)
This contains information for each constraint that we are lowering.
Simple integer binary arithmetic operators.
Definition: ISDOpcodes.h:176
SDValue getVectorShuffle(EVT VT, SDLoc dl, SDValue N1, SDValue N2, const int *MaskElts)
bool isMask_64(uint64_t Value)
Definition: MathExtras.h:335
SmallVector< ISD::OutputArg, 32 > Outs
static const uint16_t AArch64ArgRegs[]
SDValue LowerFP_ROUND(SDValue Op, SelectionDAG &DAG) const
SDValue LowerBR_CC(SDValue Op, SelectionDAG &DAG) const
SDValue getUNDEF(EVT VT)
getUNDEF - Return an UNDEF node. UNDEF does not have a useful SDLoc.
Definition: SelectionDAG.h:585
virtual std::pair< unsigned, const TargetRegisterClass * > getRegForInlineAsmConstraint(const std::string &Constraint, MVT VT) const
EVT getMemoryVT() const
getMemoryVT - Return the type of the in-memory value.
int64_t getImm() const
bool isSignedIntSetCC(CondCode Code)
Definition: ISDOpcodes.h:752
static SDValue LowerVectorSETCC(SDValue Op, SelectionDAG &DAG)
Type * getElementType() const
Definition: DerivedTypes.h:319
const TargetRegisterClass * constrainRegClass(unsigned Reg, const TargetRegisterClass *RC, unsigned MinNumRegs=0)
static bool isREVMask(ArrayRef< int > M, EVT VT, unsigned BlockSize)
const BasicBlock * getBasicBlock() const
UNDEF - An undefined node.
Definition: ISDOpcodes.h:154
bool isConstantSplat(APInt &SplatValue, APInt &SplatUndef, unsigned &SplatBitSize, bool &HasAnyUndefs, unsigned MinSplatBits=0, bool isBigEndian=false)
SDNode * getNode() const
get the SDNode which holds the desired result
bundle_iterator< MachineInstr, instr_iterator > iterator
A self-contained host- and target-independent arbitrary-precision floating-point software implementat...
Definition: APFloat.h:122
SDValue LowerSELECT_CC(SDValue Op, SelectionDAG &DAG) const
bool isTypeLegal(EVT VT) const
bool isNormalLoad(const SDNode *N)
static SDValue tryCombineToBFI(SDNode *N, TargetLowering::DAGCombinerInfo &DCI, const AArch64Subtarget *Subtarget)
MachineBasicBlock * CreateMachineBasicBlock(const BasicBlock *bb=0)
LLVM Basic Block Representation.
Definition: BasicBlock.h:72
const SDValue & getOperand(unsigned i) const
void addRegisterClass(MVT VT, const TargetRegisterClass *RC)
Simple binary floating point operators.
Definition: ISDOpcodes.h:222
void setTargetDAGCombine(ISD::NodeType NT)
bool isVectorTy() const
Definition: Type.h:229
MVT getLocVT() const
int64_t getSExtValue() const
Get sign extended value.
Definition: APInt.h:1318
const MachineOperand & getOperand(unsigned i) const
Definition: MachineInstr.h:267
static SDValue CombineVLDDUP(SDNode *N, TargetLowering::DAGCombinerInfo &DCI)
static A64CC::CondCodes FPCCToA64CC(ISD::CondCode CC, A64CC::CondCodes &Alternative)
Libcall getFPTOSINT(EVT OpVT, EVT RetVT)
virtual MachineBasicBlock * EmitInstrWithCustomInserter(MachineInstr *MI, MachineBasicBlock *MBB) const
ItTy next(ItTy it, Dist n)
Definition: STLExtras.h:154
SDValue getCopyFromReg(SDValue Chain, SDLoc dl, unsigned Reg, EVT VT)
Definition: SelectionDAG.h:510
const DataLayout * getDataLayout() const
SDValue LowerCallResult(SDValue Chain, SDValue InFlag, CallingConv::ID CallConv, bool IsVarArg, const SmallVectorImpl< ISD::InputArg > &Ins, SDLoc dl, SelectionDAG &DAG, SmallVectorImpl< SDValue > &InVals) const
unsigned getOpcode() const
unsigned GuaranteedTailCallOpt
static SDValue PerformORCombine(SDNode *N, TargetLowering::DAGCombinerInfo &DCI, const AArch64Subtarget *Subtarget)
Target-specific dag combine xforms for ISD::OR.
SDValue LowerBlockAddress(SDValue Op, SelectionDAG &DAG) const
static const unsigned NumArgRegs
arg_iterator arg_begin()
Definition: Function.h:410
CondCode getSetCCSwappedOperands(CondCode Operation)
use_iterator use_begin() const
SDValue LowerCall(CallLoweringInfo &CLI, SmallVectorImpl< SDValue > &InVals) const
MachineInstrBuilder BuildMI(MachineFunction &MF, DebugLoc DL, const MCInstrDesc &MCID)
SDValue LowerGlobalAddressELF(SDValue Op, SelectionDAG &DAG) const
X = FP_EXTEND(Y) - Extend a smaller FP type into a larger FP type.
Definition: ISDOpcodes.h:411
static SDValue CombineBaseUpdate(SDNode *N, TargetLowering::DAGCombinerInfo &DCI)
unsigned CountPopulation_64(uint64_t Value)
Definition: MathExtras.h:429
std::vector< ArgListEntry > ArgListTy
unsigned getNextStackOffset() const
static bool findMaskedBFI(SDValue N, SDValue &BFI, uint64_t &Mask, bool &Extended)
int CreateSpillStackObject(uint64_t Size, unsigned Alignment)
unsigned getFirstUnallocated(const uint16_t *Regs, unsigned NumRegs) const
static EVT getVectorVT(LLVMContext &Context, EVT VT, unsigned NumElements)
Definition: ValueTypes.h:616
uint64_t getConstantOperandVal(unsigned Num) const
ConstraintType getConstraintType(const std::string &Constraint) const
Given a constraint, return the type of constraint it is for this target.
const SDValue & getRoot() const
Definition: SelectionDAG.h:328
const MCInstrDesc & get(unsigned Opcode) const
Definition: MCInstrInfo.h:48
bool isShiftedMask_64(uint64_t Value)
Definition: MathExtras.h:348
int64_t getObjectOffset(int ObjectIdx) const
SDValue LowerF128ToCall(SDValue Op, SelectionDAG &DAG, RTLIB::Libcall Call) const
SDValue getMemcpy(SDValue Chain, SDLoc dl, SDValue Dst, SDValue Src, SDValue Size, unsigned Align, bool isVol, bool AlwaysInline, MachinePointerInfo DstPtrInfo, MachinePointerInfo SrcPtrInfo)
unsigned getByValAlign() const
static bool findEXTRHalf(SDValue N, SDValue &Src, uint32_t &ShiftAmount, bool &FromHi)
void setLoadExtAction(unsigned ExtType, MVT VT, LegalizeAction Action)
ArrayRef< int > getMask() const
Libcall getFPEXT(EVT OpVT, EVT RetVT)
virtual bool isFMAFasterThanFMulAndFAdd(EVT VT) const
std::pair< iterator, bool > insert(const std::pair< KeyT, ValueT > &KV)
Definition: DenseMap.h:153
SDValue getTargetConstantPool(const Constant *C, EVT VT, unsigned Align=0, int Offset=0, unsigned char TargetFlags=0)
Definition: SelectionDAG.h:451
SDValue getTargetJumpTable(int JTI, EVT VT, unsigned char TargetFlags=0)
Definition: SelectionDAG.h:445
virtual const TargetInstrInfo * getInstrInfo() const
unsigned getABITypeAlignment(Type *Ty) const
Definition: DataLayout.cpp:582
SDValue getTargetConstantFP(double Val, EVT VT)
Definition: SelectionDAG.h:422
SDValue getNOT(SDLoc DL, SDValue Val, EVT VT)
getNOT - Create a bitwise NOT operation as (XOR Val, -1).
bool IsEligibleForTailCallOptimization(SDValue Callee, CallingConv::ID CalleeCC, bool IsVarArg, bool IsCalleeStructRet, bool IsCallerStructRet, const SmallVectorImpl< ISD::OutputArg > &Outs, const SmallVectorImpl< SDValue > &OutVals, const SmallVectorImpl< ISD::InputArg > &Ins, SelectionDAG &DAG) const
const STC & getSubtarget() const
const char * getLibcallName(RTLIB::Libcall Call) const
Get the libcall routine name for the specified libcall.
uint64_t getTypeAllocSize(Type *Ty) const
Definition: DataLayout.h:326
void setExceptionPointerRegister(unsigned R)
static bool CC_AArch64NoMoreRegs(unsigned ValNo, MVT ValVT, MVT LocVT, CCValAssign::LocInfo LocInfo, ISD::ArgFlagsTy ArgFlags, CCState &State)
bool isPredecessorOf(const SDNode *N) const
Type * getType() const
Definition: Value.h:111
ConstraintWeight getSingleConstraintMatchWeight(AsmOperandInfo &Info, const char *Constraint) const
CCValAssign - Represent assignment of one arg/retval to a location.
SDValue getIntPtrConstant(uint64_t Val, bool isTarget=false)
MachineBasicBlock * emitAtomicCmpSwap(MachineInstr *MI, MachineBasicBlock *BB, unsigned Size) const
MachineMemOperand * getMemOperand() const
MachineFrameInfo * getFrameInfo()
bool isLegalICmpImmediate(int64_t Val) const
const MachineInstrBuilder & addFrameIndex(int Idx) const
static MachinePointerInfo getStack(int64_t Offset)
getStack - stack pointer relative access.
bool isInTailCallPosition(SelectionDAG &DAG, SDNode *Node, SDValue &Chain) const
unsigned Log2_32(uint32_t Value)
Definition: MathExtras.h:443
void swap(llvm::BitVector &LHS, llvm::BitVector &RHS)
Implement std::swap in terms of BitVector swap.
Definition: BitVector.h:591
static bool isNeonModifiedImm(uint64_t SplatBits, uint64_t SplatUndef, unsigned SplatBitSize, SelectionDAG &DAG, bool is128Bits, NeonModImmType type, EVT &VT, unsigned &Imm, unsigned &OpCmode)
Value * getArgOperand(unsigned i) const
Class for arbitrary precision integers.
Definition: APInt.h:75
SDValue getMemIntrinsicNode(unsigned Opcode, SDLoc dl, const EVT *VTs, unsigned NumVTs, const SDValue *Ops, unsigned NumOps, EVT MemVT, MachinePointerInfo PtrInfo, unsigned Align=0, bool Vol=false, bool ReadMem=true, bool WriteMem=true)
MachineBasicBlock * EmitF128CSEL(MachineInstr *MI, MachineBasicBlock *MBB) const
void setExceptionSelectorRegister(unsigned R)
virtual const TargetRegisterClass * getRegClassFor(MVT VT) const
static use_iterator use_end()
ZERO_EXTEND - Used for integer types, zeroing the new bits.
Definition: ISDOpcodes.h:357
static bool isVShiftLImm(SDValue Op, EVT VT, int64_t &Cnt)
bool isMemLoc() const
void AnalyzeCallOperands(const SmallVectorImpl< ISD::OutputArg > &Outs, CCAssignFn Fn)
SDValue getNode(unsigned Opcode, SDLoc DL, EVT VT)
ANY_EXTEND - Used for integer types. The high bits are undefined.
Definition: ISDOpcodes.h:360
uint64_t RoundUpToAlignment(uint64_t Value, uint64_t Align)
Definition: MathExtras.h:565
static int32_t getLSBForBFI(SelectionDAG &DAG, SDLoc DL, EVT VT, SDValue &MaskedVal, uint64_t Mask)
int CreateStackObject(uint64_t Size, unsigned Alignment, bool isSS, bool MayNeedSP=false, const AllocaInst *Alloca=0)
Libcall getFPTOUINT(EVT OpVT, EVT RetVT)
SDValue getTargetBlockAddress(const BlockAddress *BA, EVT VT, int64_t Offset=0, unsigned char TargetFlags=0)
Definition: SelectionDAG.h:481
uint64_t getConstantOperandVal(unsigned i) const
SmallVector< SDValue, 32 > OutVals
static const fltSemantics IEEEsingle
Definition: APFloat.h:132
Bitwise operators - logical and, logical or, logical xor.
Definition: ISDOpcodes.h:295
pointer data()
data - Return a pointer to the vector's buffer, even if empty().
Definition: SmallVector.h:135
void splice(iterator Where, MachineBasicBlock *Other, iterator From)
MachineRegisterInfo & getRegInfo()
virtual void LowerAsmOperandForConstraint(SDValue Op, std::string &Constraint, std::vector< SDValue > &Ops, SelectionDAG &DAG) const
SDValue LowerSETCC(SDValue Op, SelectionDAG &DAG) const
SDValue LowerBRCOND(SDValue Op, SelectionDAG &DAG) const
unsigned getSizeInBits() const
getSizeInBits - Return the size of the specified value type in bits.
Definition: ValueTypes.h:779
SDValue LowerReturn(SDValue Chain, CallingConv::ID CallConv, bool isVarArg, const SmallVectorImpl< ISD::OutputArg > &Outs, const SmallVectorImpl< SDValue > &OutVals, SDLoc dl, SelectionDAG &DAG) const
#define I(x, y, z)
Definition: MD5.cpp:54
#define N
void AnalyzeReturn(const SmallVectorImpl< ISD::OutputArg > &Outs, CCAssignFn Fn)
void setStackPointerRegisterToSaveRestore(unsigned R)
const TargetMachine & getTarget() const
MachineSDNode * getMachineNode(unsigned Opcode, SDLoc dl, EVT VT)
virtual const TargetRegisterInfo * getRegisterInfo() const
FSINCOS - Compute both fsin and fcos as a single operation.
Definition: ISDOpcodes.h:454
SDValue LowerFP_TO_INT(SDValue Op, SelectionDAG &DAG, bool IsSigned) const
EVT getValueType() const
SDValue LowerJumpTable(SDValue Op, SelectionDAG &DAG) const
SDValue getCondCode(ISD::CondCode Cond)
SDValue LowerGlobalAddressELFSmall(SDValue Op, SelectionDAG &DAG) const
Libcall getSINTTOFP(EVT OpVT, EVT RetVT)
bool is128BitVector() const
is128BitVector - Return true if this is a 128-bit vector type.
Definition: ValueTypes.h:681
static SDValue PerformIntrinsicCombine(SDNode *N, SelectionDAG &DAG)
ARM-specific DAG combining for intrinsics.
TLSModel::Model getTLSModel(const GlobalValue *GV) const
unsigned getReg() const
getReg - Returns the register number.
bool isFloatingPoint() const
isFloatingPoint - Return true if this is a FP, or a vector FP type.
Definition: ValueTypes.h:651
void insert(iterator MBBI, MachineBasicBlock *MBB)
SDValue getFrameIndex(int FI, EVT VT, bool isTarget=false)
void setReturnAddressIsTaken(bool s)
bool isSimple() const
Definition: ValueTypes.h:640
LLVM Value Representation.
Definition: Value.h:66
SDValue getRegister(unsigned Reg, EVT VT)
const MachineInstrBuilder & addMBB(MachineBasicBlock *MBB, unsigned char TargetFlags=0) const
bool isFPImm(const APFloat &Val, uint32_t &Imm8Bits)
SDValue LowerINT_TO_FP(SDValue Op, SelectionDAG &DAG, bool IsSigned) const
bool IsTailCallConvention(CallingConv::ID CallCC) const
static bool getVShiftImm(SDValue Op, unsigned ElementBits, int64_t &Cnt)
bool isSized() const
Definition: Type.h:278
BasicBlockListType::iterator iterator
const TargetLowering & getTargetLoweringInfo() const
Definition: SelectionDAG.h:282
bool isPowerOf2_32(uint32_t Value)
Definition: MathExtras.h:354
SDValue LowerRETURNADDR(SDValue Op, SelectionDAG &DAG) const
const MCRegisterInfo & MRI
SDValue LowerGlobalTLSAddress(SDValue Op, SelectionDAG &DAG) const
SDValue getTargetConstant(uint64_t Val, EVT VT)
Definition: SelectionDAG.h:408
unsigned getLocMemOffset() const
SDValue getEntryNode() const
Definition: SelectionDAG.h:332
int CreateFixedObject(uint64_t Size, int64_t SPOffset, bool Immutable)
TRUNCATE - Completely drop the high bits.
Definition: ISDOpcodes.h:363
unsigned AllocateReg(unsigned Reg)
static SDValue PerformANDCombine(SDNode *N, TargetLowering::DAGCombinerInfo &DCI)
bool DoesCalleeRestoreStack(CallingConv::ID CallCC, bool TailCallOpt) const
const MachineInstrBuilder & addReg(unsigned RegNo, unsigned flags=0, unsigned SubReg=0) const
void addSuccessor(MachineBasicBlock *succ, uint32_t weight=0)
int64_t getObjectSize(int ObjectIdx) const
static bool isSplatMask(const int *Mask, EVT VT)
EVT changeVectorElementTypeToInteger() const
Definition: ValueTypes.h:626
MVT getSimpleVT() const
Definition: ValueTypes.h:749
DebugLoc getDebugLoc() const
Definition: MachineInstr.h:244
EVT getSetCCResultType(LLVMContext &Context, EVT VT) const
SDValue LowerVACOPY(SDValue Op, SelectionDAG &DAG) const
uint64_t getZExtValue() const
unsigned getVectorNumElements() const
Definition: ValueTypes.h:771
void softenSetCCOperands(SelectionDAG &DAG, EVT VT, SDValue &NewLHS, SDValue &NewRHS, ISD::CondCode &CCCode, SDLoc DL) const