LLVM API Documentation

 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Groups Pages
DAGCombiner.cpp
Go to the documentation of this file.
1 //===-- DAGCombiner.cpp - Implement a DAG node combiner -------------------===//
2 //
3 // The LLVM Compiler Infrastructure
4 //
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
7 //
8 //===----------------------------------------------------------------------===//
9 //
10 // This pass combines dag nodes to form fewer, simpler DAG nodes. It can be run
11 // both before and after the DAG is legalized.
12 //
13 // This pass is not a substitute for the LLVM IR instcombine pass. This pass is
14 // primarily intended to handle simplification opportunities that are implicit
15 // in the LLVM IR and exposed by the various codegen lowering phases.
16 //
17 //===----------------------------------------------------------------------===//
18 
19 #define DEBUG_TYPE "dagcombine"
21 #include "llvm/ADT/SmallPtrSet.h"
22 #include "llvm/ADT/Statistic.h"
26 #include "llvm/IR/DataLayout.h"
27 #include "llvm/IR/DerivedTypes.h"
28 #include "llvm/IR/Function.h"
29 #include "llvm/IR/LLVMContext.h"
31 #include "llvm/Support/Debug.h"
40 #include <algorithm>
41 using namespace llvm;
42 
43 STATISTIC(NodesCombined , "Number of dag nodes combined");
44 STATISTIC(PreIndexedNodes , "Number of pre-indexed nodes created");
45 STATISTIC(PostIndexedNodes, "Number of post-indexed nodes created");
46 STATISTIC(OpsNarrowed , "Number of load/op/store narrowed");
47 STATISTIC(LdStFP2Int , "Number of fp load/store pairs transformed to int");
48 STATISTIC(SlicedLoads, "Number of load sliced");
49 
50 namespace {
51  static cl::opt<bool>
52  CombinerAA("combiner-alias-analysis", cl::Hidden,
53  cl::desc("Turn on alias analysis during testing"));
54 
55  static cl::opt<bool>
56  CombinerGlobalAA("combiner-global-alias-analysis", cl::Hidden,
57  cl::desc("Include global information in alias analysis"));
58 
59  /// Hidden option to stress test load slicing, i.e., when this option
60  /// is enabled, load slicing bypasses most of its profitability guards.
61  static cl::opt<bool>
62  StressLoadSlicing("combiner-stress-load-slicing", cl::Hidden,
63  cl::desc("Bypass the profitability model of load "
64  "slicing"),
65  cl::init(false));
66 
67 //------------------------------ DAGCombiner ---------------------------------//
68 
69  class DAGCombiner {
70  SelectionDAG &DAG;
71  const TargetLowering &TLI;
73  CodeGenOpt::Level OptLevel;
74  bool LegalOperations;
75  bool LegalTypes;
76  bool ForCodeSize;
77 
78  // Worklist of all of the nodes that need to be simplified.
79  //
80  // This has the semantics that when adding to the worklist,
81  // the item added must be next to be processed. It should
82  // also only appear once. The naive approach to this takes
83  // linear time.
84  //
85  // To reduce the insert/remove time to logarithmic, we use
86  // a set and a vector to maintain our worklist.
87  //
88  // The set contains the items on the worklist, but does not
89  // maintain the order they should be visited.
90  //
91  // The vector maintains the order nodes should be visited, but may
92  // contain duplicate or removed nodes. When choosing a node to
93  // visit, we pop off the order stack until we find an item that is
94  // also in the contents set. All operations are O(log N).
95  SmallPtrSet<SDNode*, 64> WorkListContents;
96  SmallVector<SDNode*, 64> WorkListOrder;
97 
98  // AA - Used for DAG load/store alias analysis.
99  AliasAnalysis &AA;
100 
101  /// AddUsersToWorkList - When an instruction is simplified, add all users of
102  /// the instruction to the work lists because they might get more simplified
103  /// now.
104  ///
105  void AddUsersToWorkList(SDNode *N) {
106  for (SDNode::use_iterator UI = N->use_begin(), UE = N->use_end();
107  UI != UE; ++UI)
108  AddToWorkList(*UI);
109  }
110 
111  /// visit - call the node-specific routine that knows how to fold each
112  /// particular type of node.
113  SDValue visit(SDNode *N);
114 
115  public:
116  /// AddToWorkList - Add to the work list making sure its instance is at the
117  /// back (next to be processed.)
118  void AddToWorkList(SDNode *N) {
119  WorkListContents.insert(N);
120  WorkListOrder.push_back(N);
121  }
122 
123  /// removeFromWorkList - remove all instances of N from the worklist.
124  ///
125  void removeFromWorkList(SDNode *N) {
126  WorkListContents.erase(N);
127  }
128 
129  SDValue CombineTo(SDNode *N, const SDValue *To, unsigned NumTo,
130  bool AddTo = true);
131 
132  SDValue CombineTo(SDNode *N, SDValue Res, bool AddTo = true) {
133  return CombineTo(N, &Res, 1, AddTo);
134  }
135 
136  SDValue CombineTo(SDNode *N, SDValue Res0, SDValue Res1,
137  bool AddTo = true) {
138  SDValue To[] = { Res0, Res1 };
139  return CombineTo(N, To, 2, AddTo);
140  }
141 
142  void CommitTargetLoweringOpt(const TargetLowering::TargetLoweringOpt &TLO);
143 
144  private:
145 
146  /// SimplifyDemandedBits - Check the specified integer node value to see if
147  /// it can be simplified or if things it uses can be simplified by bit
148  /// propagation. If so, return true.
149  bool SimplifyDemandedBits(SDValue Op) {
150  unsigned BitWidth = Op.getValueType().getScalarType().getSizeInBits();
151  APInt Demanded = APInt::getAllOnesValue(BitWidth);
152  return SimplifyDemandedBits(Op, Demanded);
153  }
154 
155  bool SimplifyDemandedBits(SDValue Op, const APInt &Demanded);
156 
157  bool CombineToPreIndexedLoadStore(SDNode *N);
158  bool CombineToPostIndexedLoadStore(SDNode *N);
159  bool SliceUpLoad(SDNode *N);
160 
161  void ReplaceLoadWithPromotedLoad(SDNode *Load, SDNode *ExtLoad);
162  SDValue PromoteOperand(SDValue Op, EVT PVT, bool &Replace);
163  SDValue SExtPromoteOperand(SDValue Op, EVT PVT);
164  SDValue ZExtPromoteOperand(SDValue Op, EVT PVT);
165  SDValue PromoteIntBinOp(SDValue Op);
166  SDValue PromoteIntShiftOp(SDValue Op);
167  SDValue PromoteExtend(SDValue Op);
168  bool PromoteLoad(SDValue Op);
169 
170  void ExtendSetCCUses(const SmallVectorImpl<SDNode *> &SetCCs,
171  SDValue Trunc, SDValue ExtLoad, SDLoc DL,
172  ISD::NodeType ExtType);
173 
174  /// combine - call the node-specific routine that knows how to fold each
175  /// particular type of node. If that doesn't do anything, try the
176  /// target-specific DAG combines.
177  SDValue combine(SDNode *N);
178 
179  // Visitation implementation - Implement dag node combining for different
180  // node types. The semantics are as follows:
181  // Return Value:
182  // SDValue.getNode() == 0 - No change was made
183  // SDValue.getNode() == N - N was replaced, is dead and has been handled.
184  // otherwise - N should be replaced by the returned Operand.
185  //
186  SDValue visitTokenFactor(SDNode *N);
187  SDValue visitMERGE_VALUES(SDNode *N);
188  SDValue visitADD(SDNode *N);
189  SDValue visitSUB(SDNode *N);
190  SDValue visitADDC(SDNode *N);
191  SDValue visitSUBC(SDNode *N);
192  SDValue visitADDE(SDNode *N);
193  SDValue visitSUBE(SDNode *N);
194  SDValue visitMUL(SDNode *N);
195  SDValue visitSDIV(SDNode *N);
196  SDValue visitUDIV(SDNode *N);
197  SDValue visitSREM(SDNode *N);
198  SDValue visitUREM(SDNode *N);
199  SDValue visitMULHU(SDNode *N);
200  SDValue visitMULHS(SDNode *N);
201  SDValue visitSMUL_LOHI(SDNode *N);
202  SDValue visitUMUL_LOHI(SDNode *N);
203  SDValue visitSMULO(SDNode *N);
204  SDValue visitUMULO(SDNode *N);
205  SDValue visitSDIVREM(SDNode *N);
206  SDValue visitUDIVREM(SDNode *N);
207  SDValue visitAND(SDNode *N);
208  SDValue visitOR(SDNode *N);
209  SDValue visitXOR(SDNode *N);
210  SDValue SimplifyVBinOp(SDNode *N);
211  SDValue SimplifyVUnaryOp(SDNode *N);
212  SDValue visitSHL(SDNode *N);
213  SDValue visitSRA(SDNode *N);
214  SDValue visitSRL(SDNode *N);
215  SDValue visitCTLZ(SDNode *N);
216  SDValue visitCTLZ_ZERO_UNDEF(SDNode *N);
217  SDValue visitCTTZ(SDNode *N);
218  SDValue visitCTTZ_ZERO_UNDEF(SDNode *N);
219  SDValue visitCTPOP(SDNode *N);
220  SDValue visitSELECT(SDNode *N);
221  SDValue visitVSELECT(SDNode *N);
222  SDValue visitSELECT_CC(SDNode *N);
223  SDValue visitSETCC(SDNode *N);
224  SDValue visitSIGN_EXTEND(SDNode *N);
225  SDValue visitZERO_EXTEND(SDNode *N);
226  SDValue visitANY_EXTEND(SDNode *N);
227  SDValue visitSIGN_EXTEND_INREG(SDNode *N);
228  SDValue visitTRUNCATE(SDNode *N);
229  SDValue visitBITCAST(SDNode *N);
230  SDValue visitBUILD_PAIR(SDNode *N);
231  SDValue visitFADD(SDNode *N);
232  SDValue visitFSUB(SDNode *N);
233  SDValue visitFMUL(SDNode *N);
234  SDValue visitFMA(SDNode *N);
235  SDValue visitFDIV(SDNode *N);
236  SDValue visitFREM(SDNode *N);
237  SDValue visitFCOPYSIGN(SDNode *N);
238  SDValue visitSINT_TO_FP(SDNode *N);
239  SDValue visitUINT_TO_FP(SDNode *N);
240  SDValue visitFP_TO_SINT(SDNode *N);
241  SDValue visitFP_TO_UINT(SDNode *N);
242  SDValue visitFP_ROUND(SDNode *N);
243  SDValue visitFP_ROUND_INREG(SDNode *N);
244  SDValue visitFP_EXTEND(SDNode *N);
245  SDValue visitFNEG(SDNode *N);
246  SDValue visitFABS(SDNode *N);
247  SDValue visitFCEIL(SDNode *N);
248  SDValue visitFTRUNC(SDNode *N);
249  SDValue visitFFLOOR(SDNode *N);
250  SDValue visitBRCOND(SDNode *N);
251  SDValue visitBR_CC(SDNode *N);
252  SDValue visitLOAD(SDNode *N);
253  SDValue visitSTORE(SDNode *N);
254  SDValue visitINSERT_VECTOR_ELT(SDNode *N);
255  SDValue visitEXTRACT_VECTOR_ELT(SDNode *N);
256  SDValue visitBUILD_VECTOR(SDNode *N);
257  SDValue visitCONCAT_VECTORS(SDNode *N);
258  SDValue visitEXTRACT_SUBVECTOR(SDNode *N);
259  SDValue visitVECTOR_SHUFFLE(SDNode *N);
260 
261  SDValue XformToShuffleWithZero(SDNode *N);
262  SDValue ReassociateOps(unsigned Opc, SDLoc DL, SDValue LHS, SDValue RHS);
263 
264  SDValue visitShiftByConstant(SDNode *N, unsigned Amt);
265 
266  bool SimplifySelectOps(SDNode *SELECT, SDValue LHS, SDValue RHS);
267  SDValue SimplifyBinOpWithSameOpcodeHands(SDNode *N);
268  SDValue SimplifySelect(SDLoc DL, SDValue N0, SDValue N1, SDValue N2);
269  SDValue SimplifySelectCC(SDLoc DL, SDValue N0, SDValue N1, SDValue N2,
270  SDValue N3, ISD::CondCode CC,
271  bool NotExtCompare = false);
272  SDValue SimplifySetCC(EVT VT, SDValue N0, SDValue N1, ISD::CondCode Cond,
273  SDLoc DL, bool foldBooleans = true);
274  SDValue SimplifyNodeWithTwoResults(SDNode *N, unsigned LoOp,
275  unsigned HiOp);
276  SDValue CombineConsecutiveLoads(SDNode *N, EVT VT);
277  SDValue ConstantFoldBITCASTofBUILD_VECTOR(SDNode *, EVT);
278  SDValue BuildSDIV(SDNode *N);
279  SDValue BuildUDIV(SDNode *N);
280  SDValue MatchBSwapHWordLow(SDNode *N, SDValue N0, SDValue N1,
281  bool DemandHighBits = true);
282  SDValue MatchBSwapHWord(SDNode *N, SDValue N0, SDValue N1);
283  SDNode *MatchRotate(SDValue LHS, SDValue RHS, SDLoc DL);
284  SDValue ReduceLoadWidth(SDNode *N);
285  SDValue ReduceLoadOpStoreWidth(SDNode *N);
286  SDValue TransformFPLoadStorePair(SDNode *N);
287  SDValue reduceBuildVecExtToExtBuildVec(SDNode *N);
288  SDValue reduceBuildVecConvertToConvertBuildVec(SDNode *N);
289 
290  SDValue GetDemandedBits(SDValue V, const APInt &Mask);
291 
292  /// GatherAllAliases - Walk up chain skipping non-aliasing memory nodes,
293  /// looking for aliasing nodes and adding them to the Aliases vector.
294  void GatherAllAliases(SDNode *N, SDValue OriginalChain,
295  SmallVectorImpl<SDValue> &Aliases);
296 
297  /// isAlias - Return true if there is any possibility that the two addresses
298  /// overlap.
299  bool isAlias(SDValue Ptr1, int64_t Size1, bool IsVolatile1,
300  const Value *SrcValue1, int SrcValueOffset1,
301  unsigned SrcValueAlign1,
302  const MDNode *TBAAInfo1,
303  SDValue Ptr2, int64_t Size2, bool IsVolatile2,
304  const Value *SrcValue2, int SrcValueOffset2,
305  unsigned SrcValueAlign2,
306  const MDNode *TBAAInfo2) const;
307 
308  /// isAlias - Return true if there is any possibility that the two addresses
309  /// overlap.
310  bool isAlias(LSBaseSDNode *Op0, LSBaseSDNode *Op1);
311 
312  /// FindAliasInfo - Extracts the relevant alias information from the memory
313  /// node. Returns true if the operand was a load.
314  bool FindAliasInfo(SDNode *N,
315  SDValue &Ptr, int64_t &Size, bool &IsVolatile,
316  const Value *&SrcValue, int &SrcValueOffset,
317  unsigned &SrcValueAlignment,
318  const MDNode *&TBAAInfo) const;
319 
320  /// FindBetterChain - Walk up chain skipping non-aliasing memory nodes,
321  /// looking for a better chain (aliasing node.)
322  SDValue FindBetterChain(SDNode *N, SDValue Chain);
323 
324  /// Merge consecutive store operations into a wide store.
325  /// This optimization uses wide integers or vectors when possible.
326  /// \return True if some memory operations were changed.
327  bool MergeConsecutiveStores(StoreSDNode *N);
328 
329  public:
330  DAGCombiner(SelectionDAG &D, AliasAnalysis &A, CodeGenOpt::Level OL)
331  : DAG(D), TLI(D.getTargetLoweringInfo()), Level(BeforeLegalizeTypes),
332  OptLevel(OL), LegalOperations(false), LegalTypes(false), AA(A) {
333  AttributeSet FnAttrs =
334  DAG.getMachineFunction().getFunction()->getAttributes();
335  ForCodeSize =
336  FnAttrs.hasAttribute(AttributeSet::FunctionIndex,
338  FnAttrs.hasAttribute(AttributeSet::FunctionIndex, Attribute::MinSize);
339  }
340 
341  /// Run - runs the dag combiner on all nodes in the work list
342  void Run(CombineLevel AtLevel);
343 
344  SelectionDAG &getDAG() const { return DAG; }
345 
346  /// getShiftAmountTy - Returns a type large enough to hold any valid
347  /// shift amount - before type legalization these can be huge.
348  EVT getShiftAmountTy(EVT LHSTy) {
349  assert(LHSTy.isInteger() && "Shift amount is not an integer type!");
350  if (LHSTy.isVector())
351  return LHSTy;
352  return LegalTypes ? TLI.getScalarShiftAmountTy(LHSTy)
353  : TLI.getPointerTy();
354  }
355 
356  /// isTypeLegal - This method returns true if we are running before type
357  /// legalization or if the specified VT is legal.
358  bool isTypeLegal(const EVT &VT) {
359  if (!LegalTypes) return true;
360  return TLI.isTypeLegal(VT);
361  }
362 
363  /// getSetCCResultType - Convenience wrapper around
364  /// TargetLowering::getSetCCResultType
365  EVT getSetCCResultType(EVT VT) const {
366  return TLI.getSetCCResultType(*DAG.getContext(), VT);
367  }
368  };
369 }
370 
371 
372 namespace {
373 /// WorkListRemover - This class is a DAGUpdateListener that removes any deleted
374 /// nodes from the worklist.
375 class WorkListRemover : public SelectionDAG::DAGUpdateListener {
376  DAGCombiner &DC;
377 public:
378  explicit WorkListRemover(DAGCombiner &dc)
379  : SelectionDAG::DAGUpdateListener(dc.getDAG()), DC(dc) {}
380 
381  virtual void NodeDeleted(SDNode *N, SDNode *E) {
382  DC.removeFromWorkList(N);
383  }
384 };
385 }
386 
387 //===----------------------------------------------------------------------===//
388 // TargetLowering::DAGCombinerInfo implementation
389 //===----------------------------------------------------------------------===//
390 
392  ((DAGCombiner*)DC)->AddToWorkList(N);
393 }
394 
396  ((DAGCombiner*)DC)->removeFromWorkList(N);
397 }
398 
400 CombineTo(SDNode *N, const std::vector<SDValue> &To, bool AddTo) {
401  return ((DAGCombiner*)DC)->CombineTo(N, &To[0], To.size(), AddTo);
402 }
403 
405 CombineTo(SDNode *N, SDValue Res, bool AddTo) {
406  return ((DAGCombiner*)DC)->CombineTo(N, Res, AddTo);
407 }
408 
409 
411 CombineTo(SDNode *N, SDValue Res0, SDValue Res1, bool AddTo) {
412  return ((DAGCombiner*)DC)->CombineTo(N, Res0, Res1, AddTo);
413 }
414 
417  return ((DAGCombiner*)DC)->CommitTargetLoweringOpt(TLO);
418 }
419 
420 //===----------------------------------------------------------------------===//
421 // Helper Functions
422 //===----------------------------------------------------------------------===//
423 
424 /// isNegatibleForFree - Return 1 if we can compute the negated form of the
425 /// specified expression for the same cost as the expression itself, or 2 if we
426 /// can compute the negated form more cheaply than the expression itself.
427 static char isNegatibleForFree(SDValue Op, bool LegalOperations,
428  const TargetLowering &TLI,
429  const TargetOptions *Options,
430  unsigned Depth = 0) {
431  // fneg is removable even if it has multiple uses.
432  if (Op.getOpcode() == ISD::FNEG) return 2;
433 
434  // Don't allow anything with multiple uses.
435  if (!Op.hasOneUse()) return 0;
436 
437  // Don't recurse exponentially.
438  if (Depth > 6) return 0;
439 
440  switch (Op.getOpcode()) {
441  default: return false;
442  case ISD::ConstantFP:
443  // Don't invert constant FP values after legalize. The negated constant
444  // isn't necessarily legal.
445  return LegalOperations ? 0 : 1;
446  case ISD::FADD:
447  // FIXME: determine better conditions for this xform.
448  if (!Options->UnsafeFPMath) return 0;
449 
450  // After operation legalization, it might not be legal to create new FSUBs.
451  if (LegalOperations &&
453  return 0;
454 
455  // fold (fneg (fadd A, B)) -> (fsub (fneg A), B)
456  if (char V = isNegatibleForFree(Op.getOperand(0), LegalOperations, TLI,
457  Options, Depth + 1))
458  return V;
459  // fold (fneg (fadd A, B)) -> (fsub (fneg B), A)
460  return isNegatibleForFree(Op.getOperand(1), LegalOperations, TLI, Options,
461  Depth + 1);
462  case ISD::FSUB:
463  // We can't turn -(A-B) into B-A when we honor signed zeros.
464  if (!Options->UnsafeFPMath) return 0;
465 
466  // fold (fneg (fsub A, B)) -> (fsub B, A)
467  return 1;
468 
469  case ISD::FMUL:
470  case ISD::FDIV:
471  if (Options->HonorSignDependentRoundingFPMath()) return 0;
472 
473  // fold (fneg (fmul X, Y)) -> (fmul (fneg X), Y) or (fmul X, (fneg Y))
474  if (char V = isNegatibleForFree(Op.getOperand(0), LegalOperations, TLI,
475  Options, Depth + 1))
476  return V;
477 
478  return isNegatibleForFree(Op.getOperand(1), LegalOperations, TLI, Options,
479  Depth + 1);
480 
481  case ISD::FP_EXTEND:
482  case ISD::FP_ROUND:
483  case ISD::FSIN:
484  return isNegatibleForFree(Op.getOperand(0), LegalOperations, TLI, Options,
485  Depth + 1);
486  }
487 }
488 
489 /// GetNegatedExpression - If isNegatibleForFree returns true, this function
490 /// returns the newly negated expression.
492  bool LegalOperations, unsigned Depth = 0) {
493  // fneg is removable even if it has multiple uses.
494  if (Op.getOpcode() == ISD::FNEG) return Op.getOperand(0);
495 
496  // Don't allow anything with multiple uses.
497  assert(Op.hasOneUse() && "Unknown reuse!");
498 
499  assert(Depth <= 6 && "GetNegatedExpression doesn't match isNegatibleForFree");
500  switch (Op.getOpcode()) {
501  default: llvm_unreachable("Unknown code");
502  case ISD::ConstantFP: {
503  APFloat V = cast<ConstantFPSDNode>(Op)->getValueAPF();
504  V.changeSign();
505  return DAG.getConstantFP(V, Op.getValueType());
506  }
507  case ISD::FADD:
508  // FIXME: determine better conditions for this xform.
509  assert(DAG.getTarget().Options.UnsafeFPMath);
510 
511  // fold (fneg (fadd A, B)) -> (fsub (fneg A), B)
512  if (isNegatibleForFree(Op.getOperand(0), LegalOperations,
513  DAG.getTargetLoweringInfo(),
514  &DAG.getTarget().Options, Depth+1))
515  return DAG.getNode(ISD::FSUB, SDLoc(Op), Op.getValueType(),
516  GetNegatedExpression(Op.getOperand(0), DAG,
517  LegalOperations, Depth+1),
518  Op.getOperand(1));
519  // fold (fneg (fadd A, B)) -> (fsub (fneg B), A)
520  return DAG.getNode(ISD::FSUB, SDLoc(Op), Op.getValueType(),
521  GetNegatedExpression(Op.getOperand(1), DAG,
522  LegalOperations, Depth+1),
523  Op.getOperand(0));
524  case ISD::FSUB:
525  // We can't turn -(A-B) into B-A when we honor signed zeros.
526  assert(DAG.getTarget().Options.UnsafeFPMath);
527 
528  // fold (fneg (fsub 0, B)) -> B
529  if (ConstantFPSDNode *N0CFP = dyn_cast<ConstantFPSDNode>(Op.getOperand(0)))
530  if (N0CFP->getValueAPF().isZero())
531  return Op.getOperand(1);
532 
533  // fold (fneg (fsub A, B)) -> (fsub B, A)
534  return DAG.getNode(ISD::FSUB, SDLoc(Op), Op.getValueType(),
535  Op.getOperand(1), Op.getOperand(0));
536 
537  case ISD::FMUL:
538  case ISD::FDIV:
540 
541  // fold (fneg (fmul X, Y)) -> (fmul (fneg X), Y)
542  if (isNegatibleForFree(Op.getOperand(0), LegalOperations,
543  DAG.getTargetLoweringInfo(),
544  &DAG.getTarget().Options, Depth+1))
545  return DAG.getNode(Op.getOpcode(), SDLoc(Op), Op.getValueType(),
546  GetNegatedExpression(Op.getOperand(0), DAG,
547  LegalOperations, Depth+1),
548  Op.getOperand(1));
549 
550  // fold (fneg (fmul X, Y)) -> (fmul X, (fneg Y))
551  return DAG.getNode(Op.getOpcode(), SDLoc(Op), Op.getValueType(),
552  Op.getOperand(0),
553  GetNegatedExpression(Op.getOperand(1), DAG,
554  LegalOperations, Depth+1));
555 
556  case ISD::FP_EXTEND:
557  case ISD::FSIN:
558  return DAG.getNode(Op.getOpcode(), SDLoc(Op), Op.getValueType(),
559  GetNegatedExpression(Op.getOperand(0), DAG,
560  LegalOperations, Depth+1));
561  case ISD::FP_ROUND:
562  return DAG.getNode(ISD::FP_ROUND, SDLoc(Op), Op.getValueType(),
563  GetNegatedExpression(Op.getOperand(0), DAG,
564  LegalOperations, Depth+1),
565  Op.getOperand(1));
566  }
567 }
568 
569 
570 // isSetCCEquivalent - Return true if this node is a setcc, or is a select_cc
571 // that selects between the values 1 and 0, making it equivalent to a setcc.
572 // Also, set the incoming LHS, RHS, and CC references to the appropriate
573 // nodes based on the type of node we are checking. This simplifies life a
574 // bit for the callers.
575 static bool isSetCCEquivalent(SDValue N, SDValue &LHS, SDValue &RHS,
576  SDValue &CC) {
577  if (N.getOpcode() == ISD::SETCC) {
578  LHS = N.getOperand(0);
579  RHS = N.getOperand(1);
580  CC = N.getOperand(2);
581  return true;
582  }
583  if (N.getOpcode() == ISD::SELECT_CC &&
584  N.getOperand(2).getOpcode() == ISD::Constant &&
585  N.getOperand(3).getOpcode() == ISD::Constant &&
586  cast<ConstantSDNode>(N.getOperand(2))->getAPIntValue() == 1 &&
587  cast<ConstantSDNode>(N.getOperand(3))->isNullValue()) {
588  LHS = N.getOperand(0);
589  RHS = N.getOperand(1);
590  CC = N.getOperand(4);
591  return true;
592  }
593  return false;
594 }
595 
596 // isOneUseSetCC - Return true if this is a SetCC-equivalent operation with only
597 // one use. If this is true, it allows the users to invert the operation for
598 // free when it is profitable to do so.
599 static bool isOneUseSetCC(SDValue N) {
600  SDValue N0, N1, N2;
601  if (isSetCCEquivalent(N, N0, N1, N2) && N.getNode()->hasOneUse())
602  return true;
603  return false;
604 }
605 
606 SDValue DAGCombiner::ReassociateOps(unsigned Opc, SDLoc DL,
607  SDValue N0, SDValue N1) {
608  EVT VT = N0.getValueType();
609  if (N0.getOpcode() == Opc && isa<ConstantSDNode>(N0.getOperand(1))) {
610  if (isa<ConstantSDNode>(N1)) {
611  // reassoc. (op (op x, c1), c2) -> (op x, (op c1, c2))
612  SDValue OpNode =
613  DAG.FoldConstantArithmetic(Opc, VT,
614  cast<ConstantSDNode>(N0.getOperand(1)),
615  cast<ConstantSDNode>(N1));
616  return DAG.getNode(Opc, DL, VT, N0.getOperand(0), OpNode);
617  }
618  if (N0.hasOneUse()) {
619  // reassoc. (op (op x, c1), y) -> (op (op x, y), c1) iff x+c1 has one use
620  SDValue OpNode = DAG.getNode(Opc, SDLoc(N0), VT,
621  N0.getOperand(0), N1);
622  AddToWorkList(OpNode.getNode());
623  return DAG.getNode(Opc, DL, VT, OpNode, N0.getOperand(1));
624  }
625  }
626 
627  if (N1.getOpcode() == Opc && isa<ConstantSDNode>(N1.getOperand(1))) {
628  if (isa<ConstantSDNode>(N0)) {
629  // reassoc. (op c2, (op x, c1)) -> (op x, (op c1, c2))
630  SDValue OpNode =
631  DAG.FoldConstantArithmetic(Opc, VT,
632  cast<ConstantSDNode>(N1.getOperand(1)),
633  cast<ConstantSDNode>(N0));
634  return DAG.getNode(Opc, DL, VT, N1.getOperand(0), OpNode);
635  }
636  if (N1.hasOneUse()) {
637  // reassoc. (op y, (op x, c1)) -> (op (op x, y), c1) iff x+c1 has one use
638  SDValue OpNode = DAG.getNode(Opc, SDLoc(N0), VT,
639  N1.getOperand(0), N0);
640  AddToWorkList(OpNode.getNode());
641  return DAG.getNode(Opc, DL, VT, OpNode, N1.getOperand(1));
642  }
643  }
644 
645  return SDValue();
646 }
647 
648 SDValue DAGCombiner::CombineTo(SDNode *N, const SDValue *To, unsigned NumTo,
649  bool AddTo) {
650  assert(N->getNumValues() == NumTo && "Broken CombineTo call!");
651  ++NodesCombined;
652  DEBUG(dbgs() << "\nReplacing.1 ";
653  N->dump(&DAG);
654  dbgs() << "\nWith: ";
655  To[0].getNode()->dump(&DAG);
656  dbgs() << " and " << NumTo-1 << " other values\n";
657  for (unsigned i = 0, e = NumTo; i != e; ++i)
658  assert((!To[i].getNode() ||
659  N->getValueType(i) == To[i].getValueType()) &&
660  "Cannot combine value to value of different type!"));
661  WorkListRemover DeadNodes(*this);
662  DAG.ReplaceAllUsesWith(N, To);
663  if (AddTo) {
664  // Push the new nodes and any users onto the worklist
665  for (unsigned i = 0, e = NumTo; i != e; ++i) {
666  if (To[i].getNode()) {
667  AddToWorkList(To[i].getNode());
668  AddUsersToWorkList(To[i].getNode());
669  }
670  }
671  }
672 
673  // Finally, if the node is now dead, remove it from the graph. The node
674  // may not be dead if the replacement process recursively simplified to
675  // something else needing this node.
676  if (N->use_empty()) {
677  // Nodes can be reintroduced into the worklist. Make sure we do not
678  // process a node that has been replaced.
679  removeFromWorkList(N);
680 
681  // Finally, since the node is now dead, remove it from the graph.
682  DAG.DeleteNode(N);
683  }
684  return SDValue(N, 0);
685 }
686 
687 void DAGCombiner::
688 CommitTargetLoweringOpt(const TargetLowering::TargetLoweringOpt &TLO) {
689  // Replace all uses. If any nodes become isomorphic to other nodes and
690  // are deleted, make sure to remove them from our worklist.
691  WorkListRemover DeadNodes(*this);
692  DAG.ReplaceAllUsesOfValueWith(TLO.Old, TLO.New);
693 
694  // Push the new node and any (possibly new) users onto the worklist.
695  AddToWorkList(TLO.New.getNode());
696  AddUsersToWorkList(TLO.New.getNode());
697 
698  // Finally, if the node is now dead, remove it from the graph. The node
699  // may not be dead if the replacement process recursively simplified to
700  // something else needing this node.
701  if (TLO.Old.getNode()->use_empty()) {
702  removeFromWorkList(TLO.Old.getNode());
703 
704  // If the operands of this node are only used by the node, they will now
705  // be dead. Make sure to visit them first to delete dead nodes early.
706  for (unsigned i = 0, e = TLO.Old.getNode()->getNumOperands(); i != e; ++i)
707  if (TLO.Old.getNode()->getOperand(i).getNode()->hasOneUse())
708  AddToWorkList(TLO.Old.getNode()->getOperand(i).getNode());
709 
710  DAG.DeleteNode(TLO.Old.getNode());
711  }
712 }
713 
714 /// SimplifyDemandedBits - Check the specified integer node value to see if
715 /// it can be simplified or if things it uses can be simplified by bit
716 /// propagation. If so, return true.
717 bool DAGCombiner::SimplifyDemandedBits(SDValue Op, const APInt &Demanded) {
718  TargetLowering::TargetLoweringOpt TLO(DAG, LegalTypes, LegalOperations);
719  APInt KnownZero, KnownOne;
720  if (!TLI.SimplifyDemandedBits(Op, Demanded, KnownZero, KnownOne, TLO))
721  return false;
722 
723  // Revisit the node.
724  AddToWorkList(Op.getNode());
725 
726  // Replace the old value with the new one.
727  ++NodesCombined;
728  DEBUG(dbgs() << "\nReplacing.2 ";
729  TLO.Old.getNode()->dump(&DAG);
730  dbgs() << "\nWith: ";
731  TLO.New.getNode()->dump(&DAG);
732  dbgs() << '\n');
733 
734  CommitTargetLoweringOpt(TLO);
735  return true;
736 }
737 
738 void DAGCombiner::ReplaceLoadWithPromotedLoad(SDNode *Load, SDNode *ExtLoad) {
739  SDLoc dl(Load);
740  EVT VT = Load->getValueType(0);
741  SDValue Trunc = DAG.getNode(ISD::TRUNCATE, dl, VT, SDValue(ExtLoad, 0));
742 
743  DEBUG(dbgs() << "\nReplacing.9 ";
744  Load->dump(&DAG);
745  dbgs() << "\nWith: ";
746  Trunc.getNode()->dump(&DAG);
747  dbgs() << '\n');
748  WorkListRemover DeadNodes(*this);
749  DAG.ReplaceAllUsesOfValueWith(SDValue(Load, 0), Trunc);
750  DAG.ReplaceAllUsesOfValueWith(SDValue(Load, 1), SDValue(ExtLoad, 1));
751  removeFromWorkList(Load);
752  DAG.DeleteNode(Load);
753  AddToWorkList(Trunc.getNode());
754 }
755 
756 SDValue DAGCombiner::PromoteOperand(SDValue Op, EVT PVT, bool &Replace) {
757  Replace = false;
758  SDLoc dl(Op);
759  if (LoadSDNode *LD = dyn_cast<LoadSDNode>(Op)) {
760  EVT MemVT = LD->getMemoryVT();
762  ? (TLI.isLoadExtLegal(ISD::ZEXTLOAD, MemVT) ? ISD::ZEXTLOAD
763  : ISD::EXTLOAD)
764  : LD->getExtensionType();
765  Replace = true;
766  return DAG.getExtLoad(ExtType, dl, PVT,
767  LD->getChain(), LD->getBasePtr(),
768  MemVT, LD->getMemOperand());
769  }
770 
771  unsigned Opc = Op.getOpcode();
772  switch (Opc) {
773  default: break;
774  case ISD::AssertSext:
775  return DAG.getNode(ISD::AssertSext, dl, PVT,
776  SExtPromoteOperand(Op.getOperand(0), PVT),
777  Op.getOperand(1));
778  case ISD::AssertZext:
779  return DAG.getNode(ISD::AssertZext, dl, PVT,
780  ZExtPromoteOperand(Op.getOperand(0), PVT),
781  Op.getOperand(1));
782  case ISD::Constant: {
783  unsigned ExtOpc =
785  return DAG.getNode(ExtOpc, dl, PVT, Op);
786  }
787  }
788 
789  if (!TLI.isOperationLegal(ISD::ANY_EXTEND, PVT))
790  return SDValue();
791  return DAG.getNode(ISD::ANY_EXTEND, dl, PVT, Op);
792 }
793 
794 SDValue DAGCombiner::SExtPromoteOperand(SDValue Op, EVT PVT) {
795  if (!TLI.isOperationLegal(ISD::SIGN_EXTEND_INREG, PVT))
796  return SDValue();
797  EVT OldVT = Op.getValueType();
798  SDLoc dl(Op);
799  bool Replace = false;
800  SDValue NewOp = PromoteOperand(Op, PVT, Replace);
801  if (NewOp.getNode() == 0)
802  return SDValue();
803  AddToWorkList(NewOp.getNode());
804 
805  if (Replace)
806  ReplaceLoadWithPromotedLoad(Op.getNode(), NewOp.getNode());
807  return DAG.getNode(ISD::SIGN_EXTEND_INREG, dl, NewOp.getValueType(), NewOp,
808  DAG.getValueType(OldVT));
809 }
810 
811 SDValue DAGCombiner::ZExtPromoteOperand(SDValue Op, EVT PVT) {
812  EVT OldVT = Op.getValueType();
813  SDLoc dl(Op);
814  bool Replace = false;
815  SDValue NewOp = PromoteOperand(Op, PVT, Replace);
816  if (NewOp.getNode() == 0)
817  return SDValue();
818  AddToWorkList(NewOp.getNode());
819 
820  if (Replace)
821  ReplaceLoadWithPromotedLoad(Op.getNode(), NewOp.getNode());
822  return DAG.getZeroExtendInReg(NewOp, dl, OldVT);
823 }
824 
825 /// PromoteIntBinOp - Promote the specified integer binary operation if the
826 /// target indicates it is beneficial. e.g. On x86, it's usually better to
827 /// promote i16 operations to i32 since i16 instructions are longer.
828 SDValue DAGCombiner::PromoteIntBinOp(SDValue Op) {
829  if (!LegalOperations)
830  return SDValue();
831 
832  EVT VT = Op.getValueType();
833  if (VT.isVector() || !VT.isInteger())
834  return SDValue();
835 
836  // If operation type is 'undesirable', e.g. i16 on x86, consider
837  // promoting it.
838  unsigned Opc = Op.getOpcode();
839  if (TLI.isTypeDesirableForOp(Opc, VT))
840  return SDValue();
841 
842  EVT PVT = VT;
843  // Consult target whether it is a good idea to promote this operation and
844  // what's the right type to promote it to.
845  if (TLI.IsDesirableToPromoteOp(Op, PVT)) {
846  assert(PVT != VT && "Don't know what type to promote to!");
847 
848  bool Replace0 = false;
849  SDValue N0 = Op.getOperand(0);
850  SDValue NN0 = PromoteOperand(N0, PVT, Replace0);
851  if (NN0.getNode() == 0)
852  return SDValue();
853 
854  bool Replace1 = false;
855  SDValue N1 = Op.getOperand(1);
856  SDValue NN1;
857  if (N0 == N1)
858  NN1 = NN0;
859  else {
860  NN1 = PromoteOperand(N1, PVT, Replace1);
861  if (NN1.getNode() == 0)
862  return SDValue();
863  }
864 
865  AddToWorkList(NN0.getNode());
866  if (NN1.getNode())
867  AddToWorkList(NN1.getNode());
868 
869  if (Replace0)
870  ReplaceLoadWithPromotedLoad(N0.getNode(), NN0.getNode());
871  if (Replace1)
872  ReplaceLoadWithPromotedLoad(N1.getNode(), NN1.getNode());
873 
874  DEBUG(dbgs() << "\nPromoting ";
875  Op.getNode()->dump(&DAG));
876  SDLoc dl(Op);
877  return DAG.getNode(ISD::TRUNCATE, dl, VT,
878  DAG.getNode(Opc, dl, PVT, NN0, NN1));
879  }
880  return SDValue();
881 }
882 
883 /// PromoteIntShiftOp - Promote the specified integer shift operation if the
884 /// target indicates it is beneficial. e.g. On x86, it's usually better to
885 /// promote i16 operations to i32 since i16 instructions are longer.
886 SDValue DAGCombiner::PromoteIntShiftOp(SDValue Op) {
887  if (!LegalOperations)
888  return SDValue();
889 
890  EVT VT = Op.getValueType();
891  if (VT.isVector() || !VT.isInteger())
892  return SDValue();
893 
894  // If operation type is 'undesirable', e.g. i16 on x86, consider
895  // promoting it.
896  unsigned Opc = Op.getOpcode();
897  if (TLI.isTypeDesirableForOp(Opc, VT))
898  return SDValue();
899 
900  EVT PVT = VT;
901  // Consult target whether it is a good idea to promote this operation and
902  // what's the right type to promote it to.
903  if (TLI.IsDesirableToPromoteOp(Op, PVT)) {
904  assert(PVT != VT && "Don't know what type to promote to!");
905 
906  bool Replace = false;
907  SDValue N0 = Op.getOperand(0);
908  if (Opc == ISD::SRA)
909  N0 = SExtPromoteOperand(Op.getOperand(0), PVT);
910  else if (Opc == ISD::SRL)
911  N0 = ZExtPromoteOperand(Op.getOperand(0), PVT);
912  else
913  N0 = PromoteOperand(N0, PVT, Replace);
914  if (N0.getNode() == 0)
915  return SDValue();
916 
917  AddToWorkList(N0.getNode());
918  if (Replace)
919  ReplaceLoadWithPromotedLoad(Op.getOperand(0).getNode(), N0.getNode());
920 
921  DEBUG(dbgs() << "\nPromoting ";
922  Op.getNode()->dump(&DAG));
923  SDLoc dl(Op);
924  return DAG.getNode(ISD::TRUNCATE, dl, VT,
925  DAG.getNode(Opc, dl, PVT, N0, Op.getOperand(1)));
926  }
927  return SDValue();
928 }
929 
930 SDValue DAGCombiner::PromoteExtend(SDValue Op) {
931  if (!LegalOperations)
932  return SDValue();
933 
934  EVT VT = Op.getValueType();
935  if (VT.isVector() || !VT.isInteger())
936  return SDValue();
937 
938  // If operation type is 'undesirable', e.g. i16 on x86, consider
939  // promoting it.
940  unsigned Opc = Op.getOpcode();
941  if (TLI.isTypeDesirableForOp(Opc, VT))
942  return SDValue();
943 
944  EVT PVT = VT;
945  // Consult target whether it is a good idea to promote this operation and
946  // what's the right type to promote it to.
947  if (TLI.IsDesirableToPromoteOp(Op, PVT)) {
948  assert(PVT != VT && "Don't know what type to promote to!");
949  // fold (aext (aext x)) -> (aext x)
950  // fold (aext (zext x)) -> (zext x)
951  // fold (aext (sext x)) -> (sext x)
952  DEBUG(dbgs() << "\nPromoting ";
953  Op.getNode()->dump(&DAG));
954  return DAG.getNode(Op.getOpcode(), SDLoc(Op), VT, Op.getOperand(0));
955  }
956  return SDValue();
957 }
958 
959 bool DAGCombiner::PromoteLoad(SDValue Op) {
960  if (!LegalOperations)
961  return false;
962 
963  EVT VT = Op.getValueType();
964  if (VT.isVector() || !VT.isInteger())
965  return false;
966 
967  // If operation type is 'undesirable', e.g. i16 on x86, consider
968  // promoting it.
969  unsigned Opc = Op.getOpcode();
970  if (TLI.isTypeDesirableForOp(Opc, VT))
971  return false;
972 
973  EVT PVT = VT;
974  // Consult target whether it is a good idea to promote this operation and
975  // what's the right type to promote it to.
976  if (TLI.IsDesirableToPromoteOp(Op, PVT)) {
977  assert(PVT != VT && "Don't know what type to promote to!");
978 
979  SDLoc dl(Op);
980  SDNode *N = Op.getNode();
981  LoadSDNode *LD = cast<LoadSDNode>(N);
982  EVT MemVT = LD->getMemoryVT();
984  ? (TLI.isLoadExtLegal(ISD::ZEXTLOAD, MemVT) ? ISD::ZEXTLOAD
985  : ISD::EXTLOAD)
986  : LD->getExtensionType();
987  SDValue NewLD = DAG.getExtLoad(ExtType, dl, PVT,
988  LD->getChain(), LD->getBasePtr(),
989  MemVT, LD->getMemOperand());
990  SDValue Result = DAG.getNode(ISD::TRUNCATE, dl, VT, NewLD);
991 
992  DEBUG(dbgs() << "\nPromoting ";
993  N->dump(&DAG);
994  dbgs() << "\nTo: ";
995  Result.getNode()->dump(&DAG);
996  dbgs() << '\n');
997  WorkListRemover DeadNodes(*this);
998  DAG.ReplaceAllUsesOfValueWith(SDValue(N, 0), Result);
999  DAG.ReplaceAllUsesOfValueWith(SDValue(N, 1), NewLD.getValue(1));
1000  removeFromWorkList(N);
1001  DAG.DeleteNode(N);
1002  AddToWorkList(Result.getNode());
1003  return true;
1004  }
1005  return false;
1006 }
1007 
1008 
1009 //===----------------------------------------------------------------------===//
1010 // Main DAG Combiner implementation
1011 //===----------------------------------------------------------------------===//
1012 
1013 void DAGCombiner::Run(CombineLevel AtLevel) {
1014  // set the instance variables, so that the various visit routines may use it.
1015  Level = AtLevel;
1016  LegalOperations = Level >= AfterLegalizeVectorOps;
1017  LegalTypes = Level >= AfterLegalizeTypes;
1018 
1019  // Add all the dag nodes to the worklist.
1020  for (SelectionDAG::allnodes_iterator I = DAG.allnodes_begin(),
1021  E = DAG.allnodes_end(); I != E; ++I)
1022  AddToWorkList(I);
1023 
1024  // Create a dummy node (which is not added to allnodes), that adds a reference
1025  // to the root node, preventing it from being deleted, and tracking any
1026  // changes of the root.
1027  HandleSDNode Dummy(DAG.getRoot());
1028 
1029  // The root of the dag may dangle to deleted nodes until the dag combiner is
1030  // done. Set it to null to avoid confusion.
1031  DAG.setRoot(SDValue());
1032 
1033  // while the worklist isn't empty, find a node and
1034  // try and combine it.
1035  while (!WorkListContents.empty()) {
1036  SDNode *N;
1037  // The WorkListOrder holds the SDNodes in order, but it may contain
1038  // duplicates.
1039  // In order to avoid a linear scan, we use a set (O(log N)) to hold what the
1040  // worklist *should* contain, and check the node we want to visit is should
1041  // actually be visited.
1042  do {
1043  N = WorkListOrder.pop_back_val();
1044  } while (!WorkListContents.erase(N));
1045 
1046  // If N has no uses, it is dead. Make sure to revisit all N's operands once
1047  // N is deleted from the DAG, since they too may now be dead or may have a
1048  // reduced number of uses, allowing other xforms.
1049  if (N->use_empty() && N != &Dummy) {
1050  for (unsigned i = 0, e = N->getNumOperands(); i != e; ++i)
1051  AddToWorkList(N->getOperand(i).getNode());
1052 
1053  DAG.DeleteNode(N);
1054  continue;
1055  }
1056 
1057  SDValue RV = combine(N);
1058 
1059  if (RV.getNode() == 0)
1060  continue;
1061 
1062  ++NodesCombined;
1063 
1064  // If we get back the same node we passed in, rather than a new node or
1065  // zero, we know that the node must have defined multiple values and
1066  // CombineTo was used. Since CombineTo takes care of the worklist
1067  // mechanics for us, we have no work to do in this case.
1068  if (RV.getNode() == N)
1069  continue;
1070 
1071  assert(N->getOpcode() != ISD::DELETED_NODE &&
1072  RV.getNode()->getOpcode() != ISD::DELETED_NODE &&
1073  "Node was deleted but visit returned new node!");
1074 
1075  DEBUG(dbgs() << "\nReplacing.3 ";
1076  N->dump(&DAG);
1077  dbgs() << "\nWith: ";
1078  RV.getNode()->dump(&DAG);
1079  dbgs() << '\n');
1080 
1081  // Transfer debug value.
1082  DAG.TransferDbgValues(SDValue(N, 0), RV);
1083  WorkListRemover DeadNodes(*this);
1084  if (N->getNumValues() == RV.getNode()->getNumValues())
1085  DAG.ReplaceAllUsesWith(N, RV.getNode());
1086  else {
1087  assert(N->getValueType(0) == RV.getValueType() &&
1088  N->getNumValues() == 1 && "Type mismatch");
1089  SDValue OpV = RV;
1090  DAG.ReplaceAllUsesWith(N, &OpV);
1091  }
1092 
1093  // Push the new node and any users onto the worklist
1094  AddToWorkList(RV.getNode());
1095  AddUsersToWorkList(RV.getNode());
1096 
1097  // Add any uses of the old node to the worklist in case this node is the
1098  // last one that uses them. They may become dead after this node is
1099  // deleted.
1100  for (unsigned i = 0, e = N->getNumOperands(); i != e; ++i)
1101  AddToWorkList(N->getOperand(i).getNode());
1102 
1103  // Finally, if the node is now dead, remove it from the graph. The node
1104  // may not be dead if the replacement process recursively simplified to
1105  // something else needing this node.
1106  if (N->use_empty()) {
1107  // Nodes can be reintroduced into the worklist. Make sure we do not
1108  // process a node that has been replaced.
1109  removeFromWorkList(N);
1110 
1111  // Finally, since the node is now dead, remove it from the graph.
1112  DAG.DeleteNode(N);
1113  }
1114  }
1115 
1116  // If the root changed (e.g. it was a dead load, update the root).
1117  DAG.setRoot(Dummy.getValue());
1118  DAG.RemoveDeadNodes();
1119 }
1120 
1121 SDValue DAGCombiner::visit(SDNode *N) {
1122  switch (N->getOpcode()) {
1123  default: break;
1124  case ISD::TokenFactor: return visitTokenFactor(N);
1125  case ISD::MERGE_VALUES: return visitMERGE_VALUES(N);
1126  case ISD::ADD: return visitADD(N);
1127  case ISD::SUB: return visitSUB(N);
1128  case ISD::ADDC: return visitADDC(N);
1129  case ISD::SUBC: return visitSUBC(N);
1130  case ISD::ADDE: return visitADDE(N);
1131  case ISD::SUBE: return visitSUBE(N);
1132  case ISD::MUL: return visitMUL(N);
1133  case ISD::SDIV: return visitSDIV(N);
1134  case ISD::UDIV: return visitUDIV(N);
1135  case ISD::SREM: return visitSREM(N);
1136  case ISD::UREM: return visitUREM(N);
1137  case ISD::MULHU: return visitMULHU(N);
1138  case ISD::MULHS: return visitMULHS(N);
1139  case ISD::SMUL_LOHI: return visitSMUL_LOHI(N);
1140  case ISD::UMUL_LOHI: return visitUMUL_LOHI(N);
1141  case ISD::SMULO: return visitSMULO(N);
1142  case ISD::UMULO: return visitUMULO(N);
1143  case ISD::SDIVREM: return visitSDIVREM(N);
1144  case ISD::UDIVREM: return visitUDIVREM(N);
1145  case ISD::AND: return visitAND(N);
1146  case ISD::OR: return visitOR(N);
1147  case ISD::XOR: return visitXOR(N);
1148  case ISD::SHL: return visitSHL(N);
1149  case ISD::SRA: return visitSRA(N);
1150  case ISD::SRL: return visitSRL(N);
1151  case ISD::CTLZ: return visitCTLZ(N);
1152  case ISD::CTLZ_ZERO_UNDEF: return visitCTLZ_ZERO_UNDEF(N);
1153  case ISD::CTTZ: return visitCTTZ(N);
1154  case ISD::CTTZ_ZERO_UNDEF: return visitCTTZ_ZERO_UNDEF(N);
1155  case ISD::CTPOP: return visitCTPOP(N);
1156  case ISD::SELECT: return visitSELECT(N);
1157  case ISD::VSELECT: return visitVSELECT(N);
1158  case ISD::SELECT_CC: return visitSELECT_CC(N);
1159  case ISD::SETCC: return visitSETCC(N);
1160  case ISD::SIGN_EXTEND: return visitSIGN_EXTEND(N);
1161  case ISD::ZERO_EXTEND: return visitZERO_EXTEND(N);
1162  case ISD::ANY_EXTEND: return visitANY_EXTEND(N);
1163  case ISD::SIGN_EXTEND_INREG: return visitSIGN_EXTEND_INREG(N);
1164  case ISD::TRUNCATE: return visitTRUNCATE(N);
1165  case ISD::BITCAST: return visitBITCAST(N);
1166  case ISD::BUILD_PAIR: return visitBUILD_PAIR(N);
1167  case ISD::FADD: return visitFADD(N);
1168  case ISD::FSUB: return visitFSUB(N);
1169  case ISD::FMUL: return visitFMUL(N);
1170  case ISD::FMA: return visitFMA(N);
1171  case ISD::FDIV: return visitFDIV(N);
1172  case ISD::FREM: return visitFREM(N);
1173  case ISD::FCOPYSIGN: return visitFCOPYSIGN(N);
1174  case ISD::SINT_TO_FP: return visitSINT_TO_FP(N);
1175  case ISD::UINT_TO_FP: return visitUINT_TO_FP(N);
1176  case ISD::FP_TO_SINT: return visitFP_TO_SINT(N);
1177  case ISD::FP_TO_UINT: return visitFP_TO_UINT(N);
1178  case ISD::FP_ROUND: return visitFP_ROUND(N);
1179  case ISD::FP_ROUND_INREG: return visitFP_ROUND_INREG(N);
1180  case ISD::FP_EXTEND: return visitFP_EXTEND(N);
1181  case ISD::FNEG: return visitFNEG(N);
1182  case ISD::FABS: return visitFABS(N);
1183  case ISD::FFLOOR: return visitFFLOOR(N);
1184  case ISD::FCEIL: return visitFCEIL(N);
1185  case ISD::FTRUNC: return visitFTRUNC(N);
1186  case ISD::BRCOND: return visitBRCOND(N);
1187  case ISD::BR_CC: return visitBR_CC(N);
1188  case ISD::LOAD: return visitLOAD(N);
1189  case ISD::STORE: return visitSTORE(N);
1190  case ISD::INSERT_VECTOR_ELT: return visitINSERT_VECTOR_ELT(N);
1191  case ISD::EXTRACT_VECTOR_ELT: return visitEXTRACT_VECTOR_ELT(N);
1192  case ISD::BUILD_VECTOR: return visitBUILD_VECTOR(N);
1193  case ISD::CONCAT_VECTORS: return visitCONCAT_VECTORS(N);
1194  case ISD::EXTRACT_SUBVECTOR: return visitEXTRACT_SUBVECTOR(N);
1195  case ISD::VECTOR_SHUFFLE: return visitVECTOR_SHUFFLE(N);
1196  }
1197  return SDValue();
1198 }
1199 
1200 SDValue DAGCombiner::combine(SDNode *N) {
1201  SDValue RV = visit(N);
1202 
1203  // If nothing happened, try a target-specific DAG combine.
1204  if (RV.getNode() == 0) {
1205  assert(N->getOpcode() != ISD::DELETED_NODE &&
1206  "Node was deleted but visit returned NULL!");
1207 
1208  if (N->getOpcode() >= ISD::BUILTIN_OP_END ||
1209  TLI.hasTargetDAGCombine((ISD::NodeType)N->getOpcode())) {
1210 
1211  // Expose the DAG combiner to the target combiner impls.
1213  DagCombineInfo(DAG, Level, false, this);
1214 
1215  RV = TLI.PerformDAGCombine(N, DagCombineInfo);
1216  }
1217  }
1218 
1219  // If nothing happened still, try promoting the operation.
1220  if (RV.getNode() == 0) {
1221  switch (N->getOpcode()) {
1222  default: break;
1223  case ISD::ADD:
1224  case ISD::SUB:
1225  case ISD::MUL:
1226  case ISD::AND:
1227  case ISD::OR:
1228  case ISD::XOR:
1229  RV = PromoteIntBinOp(SDValue(N, 0));
1230  break;
1231  case ISD::SHL:
1232  case ISD::SRA:
1233  case ISD::SRL:
1234  RV = PromoteIntShiftOp(SDValue(N, 0));
1235  break;
1236  case ISD::SIGN_EXTEND:
1237  case ISD::ZERO_EXTEND:
1238  case ISD::ANY_EXTEND:
1239  RV = PromoteExtend(SDValue(N, 0));
1240  break;
1241  case ISD::LOAD:
1242  if (PromoteLoad(SDValue(N, 0)))
1243  RV = SDValue(N, 0);
1244  break;
1245  }
1246  }
1247 
1248  // If N is a commutative binary node, try commuting it to enable more
1249  // sdisel CSE.
1250  if (RV.getNode() == 0 &&
1252  N->getNumValues() == 1) {
1253  SDValue N0 = N->getOperand(0);
1254  SDValue N1 = N->getOperand(1);
1255 
1256  // Constant operands are canonicalized to RHS.
1257  if (isa<ConstantSDNode>(N0) || !isa<ConstantSDNode>(N1)) {
1258  SDValue Ops[] = { N1, N0 };
1259  SDNode *CSENode = DAG.getNodeIfExists(N->getOpcode(), N->getVTList(),
1260  Ops, 2);
1261  if (CSENode)
1262  return SDValue(CSENode, 0);
1263  }
1264  }
1265 
1266  return RV;
1267 }
1268 
1269 /// getInputChainForNode - Given a node, return its input chain if it has one,
1270 /// otherwise return a null sd operand.
1272  if (unsigned NumOps = N->getNumOperands()) {
1273  if (N->getOperand(0).getValueType() == MVT::Other)
1274  return N->getOperand(0);
1275  if (N->getOperand(NumOps-1).getValueType() == MVT::Other)
1276  return N->getOperand(NumOps-1);
1277  for (unsigned i = 1; i < NumOps-1; ++i)
1278  if (N->getOperand(i).getValueType() == MVT::Other)
1279  return N->getOperand(i);
1280  }
1281  return SDValue();
1282 }
1283 
1284 SDValue DAGCombiner::visitTokenFactor(SDNode *N) {
1285  // If N has two operands, where one has an input chain equal to the other,
1286  // the 'other' chain is redundant.
1287  if (N->getNumOperands() == 2) {
1288  if (getInputChainForNode(N->getOperand(0).getNode()) == N->getOperand(1))
1289  return N->getOperand(0);
1290  if (getInputChainForNode(N->getOperand(1).getNode()) == N->getOperand(0))
1291  return N->getOperand(1);
1292  }
1293 
1294  SmallVector<SDNode *, 8> TFs; // List of token factors to visit.
1295  SmallVector<SDValue, 8> Ops; // Ops for replacing token factor.
1296  SmallPtrSet<SDNode*, 16> SeenOps;
1297  bool Changed = false; // If we should replace this token factor.
1298 
1299  // Start out with this token factor.
1300  TFs.push_back(N);
1301 
1302  // Iterate through token factors. The TFs grows when new token factors are
1303  // encountered.
1304  for (unsigned i = 0; i < TFs.size(); ++i) {
1305  SDNode *TF = TFs[i];
1306 
1307  // Check each of the operands.
1308  for (unsigned i = 0, ie = TF->getNumOperands(); i != ie; ++i) {
1309  SDValue Op = TF->getOperand(i);
1310 
1311  switch (Op.getOpcode()) {
1312  case ISD::EntryToken:
1313  // Entry tokens don't need to be added to the list. They are
1314  // rededundant.
1315  Changed = true;
1316  break;
1317 
1318  case ISD::TokenFactor:
1319  if (Op.hasOneUse() &&
1320  std::find(TFs.begin(), TFs.end(), Op.getNode()) == TFs.end()) {
1321  // Queue up for processing.
1322  TFs.push_back(Op.getNode());
1323  // Clean up in case the token factor is removed.
1324  AddToWorkList(Op.getNode());
1325  Changed = true;
1326  break;
1327  }
1328  // Fall thru
1329 
1330  default:
1331  // Only add if it isn't already in the list.
1332  if (SeenOps.insert(Op.getNode()))
1333  Ops.push_back(Op);
1334  else
1335  Changed = true;
1336  break;
1337  }
1338  }
1339  }
1340 
1341  SDValue Result;
1342 
1343  // If we've change things around then replace token factor.
1344  if (Changed) {
1345  if (Ops.empty()) {
1346  // The entry token is the only possible outcome.
1347  Result = DAG.getEntryNode();
1348  } else {
1349  // New and improved token factor.
1350  Result = DAG.getNode(ISD::TokenFactor, SDLoc(N),
1351  MVT::Other, &Ops[0], Ops.size());
1352  }
1353 
1354  // Don't add users to work list.
1355  return CombineTo(N, Result, false);
1356  }
1357 
1358  return Result;
1359 }
1360 
1361 /// MERGE_VALUES can always be eliminated.
1362 SDValue DAGCombiner::visitMERGE_VALUES(SDNode *N) {
1363  WorkListRemover DeadNodes(*this);
1364  // Replacing results may cause a different MERGE_VALUES to suddenly
1365  // be CSE'd with N, and carry its uses with it. Iterate until no
1366  // uses remain, to ensure that the node can be safely deleted.
1367  // First add the users of this node to the work list so that they
1368  // can be tried again once they have new operands.
1369  AddUsersToWorkList(N);
1370  do {
1371  for (unsigned i = 0, e = N->getNumOperands(); i != e; ++i)
1372  DAG.ReplaceAllUsesOfValueWith(SDValue(N, i), N->getOperand(i));
1373  } while (!N->use_empty());
1374  removeFromWorkList(N);
1375  DAG.DeleteNode(N);
1376  return SDValue(N, 0); // Return N so it doesn't get rechecked!
1377 }
1378 
1379 static
1381  SelectionDAG &DAG) {
1382  EVT VT = N0.getValueType();
1383  SDValue N00 = N0.getOperand(0);
1384  SDValue N01 = N0.getOperand(1);
1385  ConstantSDNode *N01C = dyn_cast<ConstantSDNode>(N01);
1386 
1387  if (N01C && N00.getOpcode() == ISD::ADD && N00.getNode()->hasOneUse() &&
1388  isa<ConstantSDNode>(N00.getOperand(1))) {
1389  // fold (add (shl (add x, c1), c2), ) -> (add (add (shl x, c2), c1<<c2), )
1390  N0 = DAG.getNode(ISD::ADD, SDLoc(N0), VT,
1391  DAG.getNode(ISD::SHL, SDLoc(N00), VT,
1392  N00.getOperand(0), N01),
1393  DAG.getNode(ISD::SHL, SDLoc(N01), VT,
1394  N00.getOperand(1), N01));
1395  return DAG.getNode(ISD::ADD, DL, VT, N0, N1);
1396  }
1397 
1398  return SDValue();
1399 }
1400 
1401 SDValue DAGCombiner::visitADD(SDNode *N) {
1402  SDValue N0 = N->getOperand(0);
1403  SDValue N1 = N->getOperand(1);
1406  EVT VT = N0.getValueType();
1407 
1408  // fold vector ops
1409  if (VT.isVector()) {
1410  SDValue FoldedVOp = SimplifyVBinOp(N);
1411  if (FoldedVOp.getNode()) return FoldedVOp;
1412 
1413  // fold (add x, 0) -> x, vector edition
1415  return N0;
1417  return N1;
1418  }
1419 
1420  // fold (add x, undef) -> undef
1421  if (N0.getOpcode() == ISD::UNDEF)
1422  return N0;
1423  if (N1.getOpcode() == ISD::UNDEF)
1424  return N1;
1425  // fold (add c1, c2) -> c1+c2
1426  if (N0C && N1C)
1427  return DAG.FoldConstantArithmetic(ISD::ADD, VT, N0C, N1C);
1428  // canonicalize constant to RHS
1429  if (N0C && !N1C)
1430  return DAG.getNode(ISD::ADD, SDLoc(N), VT, N1, N0);
1431  // fold (add x, 0) -> x
1432  if (N1C && N1C->isNullValue())
1433  return N0;
1434  // fold (add Sym, c) -> Sym+c
1435  if (GlobalAddressSDNode *GA = dyn_cast<GlobalAddressSDNode>(N0))
1436  if (!LegalOperations && TLI.isOffsetFoldingLegal(GA) && N1C &&
1437  GA->getOpcode() == ISD::GlobalAddress)
1438  return DAG.getGlobalAddress(GA->getGlobal(), SDLoc(N1C), VT,
1439  GA->getOffset() +
1440  (uint64_t)N1C->getSExtValue());
1441  // fold ((c1-A)+c2) -> (c1+c2)-A
1442  if (N1C && N0.getOpcode() == ISD::SUB)
1443  if (ConstantSDNode *N0C = dyn_cast<ConstantSDNode>(N0.getOperand(0)))
1444  return DAG.getNode(ISD::SUB, SDLoc(N), VT,
1445  DAG.getConstant(N1C->getAPIntValue()+
1446  N0C->getAPIntValue(), VT),
1447  N0.getOperand(1));
1448  // reassociate add
1449  SDValue RADD = ReassociateOps(ISD::ADD, SDLoc(N), N0, N1);
1450  if (RADD.getNode() != 0)
1451  return RADD;
1452  // fold ((0-A) + B) -> B-A
1453  if (N0.getOpcode() == ISD::SUB && isa<ConstantSDNode>(N0.getOperand(0)) &&
1454  cast<ConstantSDNode>(N0.getOperand(0))->isNullValue())
1455  return DAG.getNode(ISD::SUB, SDLoc(N), VT, N1, N0.getOperand(1));
1456  // fold (A + (0-B)) -> A-B
1457  if (N1.getOpcode() == ISD::SUB && isa<ConstantSDNode>(N1.getOperand(0)) &&
1458  cast<ConstantSDNode>(N1.getOperand(0))->isNullValue())
1459  return DAG.getNode(ISD::SUB, SDLoc(N), VT, N0, N1.getOperand(1));
1460  // fold (A+(B-A)) -> B
1461  if (N1.getOpcode() == ISD::SUB && N0 == N1.getOperand(1))
1462  return N1.getOperand(0);
1463  // fold ((B-A)+A) -> B
1464  if (N0.getOpcode() == ISD::SUB && N1 == N0.getOperand(1))
1465  return N0.getOperand(0);
1466  // fold (A+(B-(A+C))) to (B-C)
1467  if (N1.getOpcode() == ISD::SUB && N1.getOperand(1).getOpcode() == ISD::ADD &&
1468  N0 == N1.getOperand(1).getOperand(0))
1469  return DAG.getNode(ISD::SUB, SDLoc(N), VT, N1.getOperand(0),
1470  N1.getOperand(1).getOperand(1));
1471  // fold (A+(B-(C+A))) to (B-C)
1472  if (N1.getOpcode() == ISD::SUB && N1.getOperand(1).getOpcode() == ISD::ADD &&
1473  N0 == N1.getOperand(1).getOperand(1))
1474  return DAG.getNode(ISD::SUB, SDLoc(N), VT, N1.getOperand(0),
1475  N1.getOperand(1).getOperand(0));
1476  // fold (A+((B-A)+or-C)) to (B+or-C)
1477  if ((N1.getOpcode() == ISD::SUB || N1.getOpcode() == ISD::ADD) &&
1478  N1.getOperand(0).getOpcode() == ISD::SUB &&
1479  N0 == N1.getOperand(0).getOperand(1))
1480  return DAG.getNode(N1.getOpcode(), SDLoc(N), VT,
1481  N1.getOperand(0).getOperand(0), N1.getOperand(1));
1482 
1483  // fold (A-B)+(C-D) to (A+C)-(B+D) when A or C is constant
1484  if (N0.getOpcode() == ISD::SUB && N1.getOpcode() == ISD::SUB) {
1485  SDValue N00 = N0.getOperand(0);
1486  SDValue N01 = N0.getOperand(1);
1487  SDValue N10 = N1.getOperand(0);
1488  SDValue N11 = N1.getOperand(1);
1489 
1490  if (isa<ConstantSDNode>(N00) || isa<ConstantSDNode>(N10))
1491  return DAG.getNode(ISD::SUB, SDLoc(N), VT,
1492  DAG.getNode(ISD::ADD, SDLoc(N0), VT, N00, N10),
1493  DAG.getNode(ISD::ADD, SDLoc(N1), VT, N01, N11));
1494  }
1495 
1496  if (!VT.isVector() && SimplifyDemandedBits(SDValue(N, 0)))
1497  return SDValue(N, 0);
1498 
1499  // fold (a+b) -> (a|b) iff a and b share no bits.
1500  if (VT.isInteger() && !VT.isVector()) {
1501  APInt LHSZero, LHSOne;
1502  APInt RHSZero, RHSOne;
1503  DAG.ComputeMaskedBits(N0, LHSZero, LHSOne);
1504 
1505  if (LHSZero.getBoolValue()) {
1506  DAG.ComputeMaskedBits(N1, RHSZero, RHSOne);
1507 
1508  // If all possibly-set bits on the LHS are clear on the RHS, return an OR.
1509  // If all possibly-set bits on the RHS are clear on the LHS, return an OR.
1510  if ((RHSZero & ~LHSZero) == ~LHSZero || (LHSZero & ~RHSZero) == ~RHSZero)
1511  return DAG.getNode(ISD::OR, SDLoc(N), VT, N0, N1);
1512  }
1513  }
1514 
1515  // fold (add (shl (add x, c1), c2), ) -> (add (add (shl x, c2), c1<<c2), )
1516  if (N0.getOpcode() == ISD::SHL && N0.getNode()->hasOneUse()) {
1517  SDValue Result = combineShlAddConstant(SDLoc(N), N0, N1, DAG);
1518  if (Result.getNode()) return Result;
1519  }
1520  if (N1.getOpcode() == ISD::SHL && N1.getNode()->hasOneUse()) {
1521  SDValue Result = combineShlAddConstant(SDLoc(N), N1, N0, DAG);
1522  if (Result.getNode()) return Result;
1523  }
1524 
1525  // fold (add x, shl(0 - y, n)) -> sub(x, shl(y, n))
1526  if (N1.getOpcode() == ISD::SHL &&
1527  N1.getOperand(0).getOpcode() == ISD::SUB)
1528  if (ConstantSDNode *C =
1529  dyn_cast<ConstantSDNode>(N1.getOperand(0).getOperand(0)))
1530  if (C->getAPIntValue() == 0)
1531  return DAG.getNode(ISD::SUB, SDLoc(N), VT, N0,
1532  DAG.getNode(ISD::SHL, SDLoc(N), VT,
1533  N1.getOperand(0).getOperand(1),
1534  N1.getOperand(1)));
1535  if (N0.getOpcode() == ISD::SHL &&
1536  N0.getOperand(0).getOpcode() == ISD::SUB)
1537  if (ConstantSDNode *C =
1538  dyn_cast<ConstantSDNode>(N0.getOperand(0).getOperand(0)))
1539  if (C->getAPIntValue() == 0)
1540  return DAG.getNode(ISD::SUB, SDLoc(N), VT, N1,
1541  DAG.getNode(ISD::SHL, SDLoc(N), VT,
1542  N0.getOperand(0).getOperand(1),
1543  N0.getOperand(1)));
1544 
1545  if (N1.getOpcode() == ISD::AND) {
1546  SDValue AndOp0 = N1.getOperand(0);
1547  ConstantSDNode *AndOp1 = dyn_cast<ConstantSDNode>(N1->getOperand(1));
1548  unsigned NumSignBits = DAG.ComputeNumSignBits(AndOp0);
1549  unsigned DestBits = VT.getScalarType().getSizeInBits();
1550 
1551  // (add z, (and (sbbl x, x), 1)) -> (sub z, (sbbl x, x))
1552  // and similar xforms where the inner op is either ~0 or 0.
1553  if (NumSignBits == DestBits && AndOp1 && AndOp1->isOne()) {
1554  SDLoc DL(N);
1555  return DAG.getNode(ISD::SUB, DL, VT, N->getOperand(0), AndOp0);
1556  }
1557  }
1558 
1559  // add (sext i1), X -> sub X, (zext i1)
1560  if (N0.getOpcode() == ISD::SIGN_EXTEND &&
1561  N0.getOperand(0).getValueType() == MVT::i1 &&
1562  !TLI.isOperationLegal(ISD::SIGN_EXTEND, MVT::i1)) {
1563  SDLoc DL(N);
1564  SDValue ZExt = DAG.getNode(ISD::ZERO_EXTEND, DL, VT, N0.getOperand(0));
1565  return DAG.getNode(ISD::SUB, DL, VT, N1, ZExt);
1566  }
1567 
1568  return SDValue();
1569 }
1570 
1571 SDValue DAGCombiner::visitADDC(SDNode *N) {
1572  SDValue N0 = N->getOperand(0);
1573  SDValue N1 = N->getOperand(1);
1576  EVT VT = N0.getValueType();
1577 
1578  // If the flag result is dead, turn this into an ADD.
1579  if (!N->hasAnyUseOfValue(1))
1580  return CombineTo(N, DAG.getNode(ISD::ADD, SDLoc(N), VT, N0, N1),
1581  DAG.getNode(ISD::CARRY_FALSE,
1582  SDLoc(N), MVT::Glue));
1583 
1584  // canonicalize constant to RHS.
1585  if (N0C && !N1C)
1586  return DAG.getNode(ISD::ADDC, SDLoc(N), N->getVTList(), N1, N0);
1587 
1588  // fold (addc x, 0) -> x + no carry out
1589  if (N1C && N1C->isNullValue())
1590  return CombineTo(N, N0, DAG.getNode(ISD::CARRY_FALSE,
1591  SDLoc(N), MVT::Glue));
1592 
1593  // fold (addc a, b) -> (or a, b), CARRY_FALSE iff a and b share no bits.
1594  APInt LHSZero, LHSOne;
1595  APInt RHSZero, RHSOne;
1596  DAG.ComputeMaskedBits(N0, LHSZero, LHSOne);
1597 
1598  if (LHSZero.getBoolValue()) {
1599  DAG.ComputeMaskedBits(N1, RHSZero, RHSOne);
1600 
1601  // If all possibly-set bits on the LHS are clear on the RHS, return an OR.
1602  // If all possibly-set bits on the RHS are clear on the LHS, return an OR.
1603  if ((RHSZero & ~LHSZero) == ~LHSZero || (LHSZero & ~RHSZero) == ~RHSZero)
1604  return CombineTo(N, DAG.getNode(ISD::OR, SDLoc(N), VT, N0, N1),
1605  DAG.getNode(ISD::CARRY_FALSE,
1606  SDLoc(N), MVT::Glue));
1607  }
1608 
1609  return SDValue();
1610 }
1611 
1612 SDValue DAGCombiner::visitADDE(SDNode *N) {
1613  SDValue N0 = N->getOperand(0);
1614  SDValue N1 = N->getOperand(1);
1615  SDValue CarryIn = N->getOperand(2);
1618 
1619  // canonicalize constant to RHS
1620  if (N0C && !N1C)
1621  return DAG.getNode(ISD::ADDE, SDLoc(N), N->getVTList(),
1622  N1, N0, CarryIn);
1623 
1624  // fold (adde x, y, false) -> (addc x, y)
1625  if (CarryIn.getOpcode() == ISD::CARRY_FALSE)
1626  return DAG.getNode(ISD::ADDC, SDLoc(N), N->getVTList(), N0, N1);
1627 
1628  return SDValue();
1629 }
1630 
1631 // Since it may not be valid to emit a fold to zero for vector initializers
1632 // check if we can before folding.
1633 static SDValue tryFoldToZero(SDLoc DL, const TargetLowering &TLI, EVT VT,
1634  SelectionDAG &DAG,
1635  bool LegalOperations, bool LegalTypes) {
1636  if (!VT.isVector())
1637  return DAG.getConstant(0, VT);
1638  if (!LegalOperations || TLI.isOperationLegal(ISD::BUILD_VECTOR, VT))
1639  return DAG.getConstant(0, VT);
1640  return SDValue();
1641 }
1642 
1643 SDValue DAGCombiner::visitSUB(SDNode *N) {
1644  SDValue N0 = N->getOperand(0);
1645  SDValue N1 = N->getOperand(1);
1648  ConstantSDNode *N1C1 = N1.getOpcode() != ISD::ADD ? 0 :
1650  EVT VT = N0.getValueType();
1651 
1652  // fold vector ops
1653  if (VT.isVector()) {
1654  SDValue FoldedVOp = SimplifyVBinOp(N);
1655  if (FoldedVOp.getNode()) return FoldedVOp;
1656 
1657  // fold (sub x, 0) -> x, vector edition
1659  return N0;
1660  }
1661 
1662  // fold (sub x, x) -> 0
1663  // FIXME: Refactor this and xor and other similar operations together.
1664  if (N0 == N1)
1665  return tryFoldToZero(SDLoc(N), TLI, VT, DAG, LegalOperations, LegalTypes);
1666  // fold (sub c1, c2) -> c1-c2
1667  if (N0C && N1C)
1668  return DAG.FoldConstantArithmetic(ISD::SUB, VT, N0C, N1C);
1669  // fold (sub x, c) -> (add x, -c)
1670  if (N1C)
1671  return DAG.getNode(ISD::ADD, SDLoc(N), VT, N0,
1672  DAG.getConstant(-N1C->getAPIntValue(), VT));
1673  // Canonicalize (sub -1, x) -> ~x, i.e. (xor x, -1)
1674  if (N0C && N0C->isAllOnesValue())
1675  return DAG.getNode(ISD::XOR, SDLoc(N), VT, N1, N0);
1676  // fold A-(A-B) -> B
1677  if (N1.getOpcode() == ISD::SUB && N0 == N1.getOperand(0))
1678  return N1.getOperand(1);
1679  // fold (A+B)-A -> B
1680  if (N0.getOpcode() == ISD::ADD && N0.getOperand(0) == N1)
1681  return N0.getOperand(1);
1682  // fold (A+B)-B -> A
1683  if (N0.getOpcode() == ISD::ADD && N0.getOperand(1) == N1)
1684  return N0.getOperand(0);
1685  // fold C2-(A+C1) -> (C2-C1)-A
1686  if (N1.getOpcode() == ISD::ADD && N0C && N1C1) {
1687  SDValue NewC = DAG.getConstant(N0C->getAPIntValue() - N1C1->getAPIntValue(),
1688  VT);
1689  return DAG.getNode(ISD::SUB, SDLoc(N), VT, NewC,
1690  N1.getOperand(0));
1691  }
1692  // fold ((A+(B+or-C))-B) -> A+or-C
1693  if (N0.getOpcode() == ISD::ADD &&
1694  (N0.getOperand(1).getOpcode() == ISD::SUB ||
1695  N0.getOperand(1).getOpcode() == ISD::ADD) &&
1696  N0.getOperand(1).getOperand(0) == N1)
1697  return DAG.getNode(N0.getOperand(1).getOpcode(), SDLoc(N), VT,
1698  N0.getOperand(0), N0.getOperand(1).getOperand(1));
1699  // fold ((A+(C+B))-B) -> A+C
1700  if (N0.getOpcode() == ISD::ADD &&
1701  N0.getOperand(1).getOpcode() == ISD::ADD &&
1702  N0.getOperand(1).getOperand(1) == N1)
1703  return DAG.getNode(ISD::ADD, SDLoc(N), VT,
1704  N0.getOperand(0), N0.getOperand(1).getOperand(0));
1705  // fold ((A-(B-C))-C) -> A-B
1706  if (N0.getOpcode() == ISD::SUB &&
1707  N0.getOperand(1).getOpcode() == ISD::SUB &&
1708  N0.getOperand(1).getOperand(1) == N1)
1709  return DAG.getNode(ISD::SUB, SDLoc(N), VT,
1710  N0.getOperand(0), N0.getOperand(1).getOperand(0));
1711 
1712  // If either operand of a sub is undef, the result is undef
1713  if (N0.getOpcode() == ISD::UNDEF)
1714  return N0;
1715  if (N1.getOpcode() == ISD::UNDEF)
1716  return N1;
1717 
1718  // If the relocation model supports it, consider symbol offsets.
1719  if (GlobalAddressSDNode *GA = dyn_cast<GlobalAddressSDNode>(N0))
1720  if (!LegalOperations && TLI.isOffsetFoldingLegal(GA)) {
1721  // fold (sub Sym, c) -> Sym-c
1722  if (N1C && GA->getOpcode() == ISD::GlobalAddress)
1723  return DAG.getGlobalAddress(GA->getGlobal(), SDLoc(N1C), VT,
1724  GA->getOffset() -
1725  (uint64_t)N1C->getSExtValue());
1726  // fold (sub Sym+c1, Sym+c2) -> c1-c2
1727  if (GlobalAddressSDNode *GB = dyn_cast<GlobalAddressSDNode>(N1))
1728  if (GA->getGlobal() == GB->getGlobal())
1729  return DAG.getConstant((uint64_t)GA->getOffset() - GB->getOffset(),
1730  VT);
1731  }
1732 
1733  return SDValue();
1734 }
1735 
1736 SDValue DAGCombiner::visitSUBC(SDNode *N) {
1737  SDValue N0 = N->getOperand(0);
1738  SDValue N1 = N->getOperand(1);
1741  EVT VT = N0.getValueType();
1742 
1743  // If the flag result is dead, turn this into an SUB.
1744  if (!N->hasAnyUseOfValue(1))
1745  return CombineTo(N, DAG.getNode(ISD::SUB, SDLoc(N), VT, N0, N1),
1746  DAG.getNode(ISD::CARRY_FALSE, SDLoc(N),
1747  MVT::Glue));
1748 
1749  // fold (subc x, x) -> 0 + no borrow
1750  if (N0 == N1)
1751  return CombineTo(N, DAG.getConstant(0, VT),
1752  DAG.getNode(ISD::CARRY_FALSE, SDLoc(N),
1753  MVT::Glue));
1754 
1755  // fold (subc x, 0) -> x + no borrow
1756  if (N1C && N1C->isNullValue())
1757  return CombineTo(N, N0, DAG.getNode(ISD::CARRY_FALSE, SDLoc(N),
1758  MVT::Glue));
1759 
1760  // Canonicalize (sub -1, x) -> ~x, i.e. (xor x, -1) + no borrow
1761  if (N0C && N0C->isAllOnesValue())
1762  return CombineTo(N, DAG.getNode(ISD::XOR, SDLoc(N), VT, N1, N0),
1763  DAG.getNode(ISD::CARRY_FALSE, SDLoc(N),
1764  MVT::Glue));
1765 
1766  return SDValue();
1767 }
1768 
1769 SDValue DAGCombiner::visitSUBE(SDNode *N) {
1770  SDValue N0 = N->getOperand(0);
1771  SDValue N1 = N->getOperand(1);
1772  SDValue CarryIn = N->getOperand(2);
1773 
1774  // fold (sube x, y, false) -> (subc x, y)
1775  if (CarryIn.getOpcode() == ISD::CARRY_FALSE)
1776  return DAG.getNode(ISD::SUBC, SDLoc(N), N->getVTList(), N0, N1);
1777 
1778  return SDValue();
1779 }
1780 
1781 /// isConstantSplatVector - Returns true if N is a BUILD_VECTOR node whose
1782 /// elements are all the same constant or undefined.
1783 static bool isConstantSplatVector(SDNode *N, APInt& SplatValue) {
1785  if (!C)
1786  return false;
1787 
1788  APInt SplatUndef;
1789  unsigned SplatBitSize;
1790  bool HasAnyUndefs;
1791  EVT EltVT = N->getValueType(0).getVectorElementType();
1792  return (C->isConstantSplat(SplatValue, SplatUndef, SplatBitSize,
1793  HasAnyUndefs) &&
1794  EltVT.getSizeInBits() >= SplatBitSize);
1795 }
1796 
1797 SDValue DAGCombiner::visitMUL(SDNode *N) {
1798  SDValue N0 = N->getOperand(0);
1799  SDValue N1 = N->getOperand(1);
1800  EVT VT = N0.getValueType();
1801 
1802  // fold (mul x, undef) -> 0
1803  if (N0.getOpcode() == ISD::UNDEF || N1.getOpcode() == ISD::UNDEF)
1804  return DAG.getConstant(0, VT);
1805 
1806  bool N0IsConst = false;
1807  bool N1IsConst = false;
1808  APInt ConstValue0, ConstValue1;
1809  // fold vector ops
1810  if (VT.isVector()) {
1811  SDValue FoldedVOp = SimplifyVBinOp(N);
1812  if (FoldedVOp.getNode()) return FoldedVOp;
1813 
1814  N0IsConst = isConstantSplatVector(N0.getNode(), ConstValue0);
1815  N1IsConst = isConstantSplatVector(N1.getNode(), ConstValue1);
1816  } else {
1817  N0IsConst = dyn_cast<ConstantSDNode>(N0) != 0;
1818  ConstValue0 = N0IsConst ? (dyn_cast<ConstantSDNode>(N0))->getAPIntValue()
1819  : APInt();
1820  N1IsConst = dyn_cast<ConstantSDNode>(N1) != 0;
1821  ConstValue1 = N1IsConst ? (dyn_cast<ConstantSDNode>(N1))->getAPIntValue()
1822  : APInt();
1823  }
1824 
1825  // fold (mul c1, c2) -> c1*c2
1826  if (N0IsConst && N1IsConst)
1827  return DAG.FoldConstantArithmetic(ISD::MUL, VT, N0.getNode(), N1.getNode());
1828 
1829  // canonicalize constant to RHS
1830  if (N0IsConst && !N1IsConst)
1831  return DAG.getNode(ISD::MUL, SDLoc(N), VT, N1, N0);
1832  // fold (mul x, 0) -> 0
1833  if (N1IsConst && ConstValue1 == 0)
1834  return N1;
1835  // We require a splat of the entire scalar bit width for non-contiguous
1836  // bit patterns.
1837  bool IsFullSplat =
1838  ConstValue1.getBitWidth() == VT.getScalarType().getSizeInBits();
1839  // fold (mul x, 1) -> x
1840  if (N1IsConst && ConstValue1 == 1 && IsFullSplat)
1841  return N0;
1842  // fold (mul x, -1) -> 0-x
1843  if (N1IsConst && ConstValue1.isAllOnesValue())
1844  return DAG.getNode(ISD::SUB, SDLoc(N), VT,
1845  DAG.getConstant(0, VT), N0);
1846  // fold (mul x, (1 << c)) -> x << c
1847  if (N1IsConst && ConstValue1.isPowerOf2() && IsFullSplat)
1848  return DAG.getNode(ISD::SHL, SDLoc(N), VT, N0,
1849  DAG.getConstant(ConstValue1.logBase2(),
1851  // fold (mul x, -(1 << c)) -> -(x << c) or (-x) << c
1852  if (N1IsConst && (-ConstValue1).isPowerOf2() && IsFullSplat) {
1853  unsigned Log2Val = (-ConstValue1).logBase2();
1854  // FIXME: If the input is something that is easily negated (e.g. a
1855  // single-use add), we should put the negate there.
1856  return DAG.getNode(ISD::SUB, SDLoc(N), VT,
1857  DAG.getConstant(0, VT),
1858  DAG.getNode(ISD::SHL, SDLoc(N), VT, N0,
1859  DAG.getConstant(Log2Val,
1860  getShiftAmountTy(N0.getValueType()))));
1861  }
1862 
1863  APInt Val;
1864  // (mul (shl X, c1), c2) -> (mul X, c2 << c1)
1865  if (N1IsConst && N0.getOpcode() == ISD::SHL &&
1866  (isConstantSplatVector(N0.getOperand(1).getNode(), Val) ||
1867  isa<ConstantSDNode>(N0.getOperand(1)))) {
1868  SDValue C3 = DAG.getNode(ISD::SHL, SDLoc(N), VT,
1869  N1, N0.getOperand(1));
1870  AddToWorkList(C3.getNode());
1871  return DAG.getNode(ISD::MUL, SDLoc(N), VT,
1872  N0.getOperand(0), C3);
1873  }
1874 
1875  // Change (mul (shl X, C), Y) -> (shl (mul X, Y), C) when the shift has one
1876  // use.
1877  {
1878  SDValue Sh(0,0), Y(0,0);
1879  // Check for both (mul (shl X, C), Y) and (mul Y, (shl X, C)).
1880  if (N0.getOpcode() == ISD::SHL &&
1881  (isConstantSplatVector(N0.getOperand(1).getNode(), Val) ||
1882  isa<ConstantSDNode>(N0.getOperand(1))) &&
1883  N0.getNode()->hasOneUse()) {
1884  Sh = N0; Y = N1;
1885  } else if (N1.getOpcode() == ISD::SHL &&
1886  isa<ConstantSDNode>(N1.getOperand(1)) &&
1887  N1.getNode()->hasOneUse()) {
1888  Sh = N1; Y = N0;
1889  }
1890 
1891  if (Sh.getNode()) {
1892  SDValue Mul = DAG.getNode(ISD::MUL, SDLoc(N), VT,
1893  Sh.getOperand(0), Y);
1894  return DAG.getNode(ISD::SHL, SDLoc(N), VT,
1895  Mul, Sh.getOperand(1));
1896  }
1897  }
1898 
1899  // fold (mul (add x, c1), c2) -> (add (mul x, c2), c1*c2)
1900  if (N1IsConst && N0.getOpcode() == ISD::ADD && N0.getNode()->hasOneUse() &&
1901  (isConstantSplatVector(N0.getOperand(1).getNode(), Val) ||
1902  isa<ConstantSDNode>(N0.getOperand(1))))
1903  return DAG.getNode(ISD::ADD, SDLoc(N), VT,
1904  DAG.getNode(ISD::MUL, SDLoc(N0), VT,
1905  N0.getOperand(0), N1),
1906  DAG.getNode(ISD::MUL, SDLoc(N1), VT,
1907  N0.getOperand(1), N1));
1908 
1909  // reassociate mul
1910  SDValue RMUL = ReassociateOps(ISD::MUL, SDLoc(N), N0, N1);
1911  if (RMUL.getNode() != 0)
1912  return RMUL;
1913 
1914  return SDValue();
1915 }
1916 
1917 SDValue DAGCombiner::visitSDIV(SDNode *N) {
1918  SDValue N0 = N->getOperand(0);
1919  SDValue N1 = N->getOperand(1);
1922  EVT VT = N->getValueType(0);
1923 
1924  // fold vector ops
1925  if (VT.isVector()) {
1926  SDValue FoldedVOp = SimplifyVBinOp(N);
1927  if (FoldedVOp.getNode()) return FoldedVOp;
1928  }
1929 
1930  // fold (sdiv c1, c2) -> c1/c2
1931  if (N0C && N1C && !N1C->isNullValue())
1932  return DAG.FoldConstantArithmetic(ISD::SDIV, VT, N0C, N1C);
1933  // fold (sdiv X, 1) -> X
1934  if (N1C && N1C->getAPIntValue() == 1LL)
1935  return N0;
1936  // fold (sdiv X, -1) -> 0-X
1937  if (N1C && N1C->isAllOnesValue())
1938  return DAG.getNode(ISD::SUB, SDLoc(N), VT,
1939  DAG.getConstant(0, VT), N0);
1940  // If we know the sign bits of both operands are zero, strength reduce to a
1941  // udiv instead. Handles (X&15) /s 4 -> X&15 >> 2
1942  if (!VT.isVector()) {
1943  if (DAG.SignBitIsZero(N1) && DAG.SignBitIsZero(N0))
1944  return DAG.getNode(ISD::UDIV, SDLoc(N), N1.getValueType(),
1945  N0, N1);
1946  }
1947  // fold (sdiv X, pow2) -> simple ops after legalize
1948  if (N1C && !N1C->isNullValue() &&
1949  (N1C->getAPIntValue().isPowerOf2() ||
1950  (-N1C->getAPIntValue()).isPowerOf2())) {
1951  // If dividing by powers of two is cheap, then don't perform the following
1952  // fold.
1953  if (TLI.isPow2DivCheap())
1954  return SDValue();
1955 
1956  unsigned lg2 = N1C->getAPIntValue().countTrailingZeros();
1957 
1958  // Splat the sign bit into the register
1959  SDValue SGN = DAG.getNode(ISD::SRA, SDLoc(N), VT, N0,
1960  DAG.getConstant(VT.getSizeInBits()-1,
1962  AddToWorkList(SGN.getNode());
1963 
1964  // Add (N0 < 0) ? abs2 - 1 : 0;
1965  SDValue SRL = DAG.getNode(ISD::SRL, SDLoc(N), VT, SGN,
1966  DAG.getConstant(VT.getSizeInBits() - lg2,
1967  getShiftAmountTy(SGN.getValueType())));
1968  SDValue ADD = DAG.getNode(ISD::ADD, SDLoc(N), VT, N0, SRL);
1969  AddToWorkList(SRL.getNode());
1970  AddToWorkList(ADD.getNode()); // Divide by pow2
1971  SDValue SRA = DAG.getNode(ISD::SRA, SDLoc(N), VT, ADD,
1972  DAG.getConstant(lg2, getShiftAmountTy(ADD.getValueType())));
1973 
1974  // If we're dividing by a positive value, we're done. Otherwise, we must
1975  // negate the result.
1976  if (N1C->getAPIntValue().isNonNegative())
1977  return SRA;
1978 
1979  AddToWorkList(SRA.getNode());
1980  return DAG.getNode(ISD::SUB, SDLoc(N), VT,
1981  DAG.getConstant(0, VT), SRA);
1982  }
1983 
1984  // if integer divide is expensive and we satisfy the requirements, emit an
1985  // alternate sequence.
1986  if (N1C && !N1C->isNullValue() && !TLI.isIntDivCheap()) {
1987  SDValue Op = BuildSDIV(N);
1988  if (Op.getNode()) return Op;
1989  }
1990 
1991  // undef / X -> 0
1992  if (N0.getOpcode() == ISD::UNDEF)
1993  return DAG.getConstant(0, VT);
1994  // X / undef -> undef
1995  if (N1.getOpcode() == ISD::UNDEF)
1996  return N1;
1997 
1998  return SDValue();
1999 }
2000 
2001 SDValue DAGCombiner::visitUDIV(SDNode *N) {
2002  SDValue N0 = N->getOperand(0);
2003  SDValue N1 = N->getOperand(1);
2006  EVT VT = N->getValueType(0);
2007 
2008  // fold vector ops
2009  if (VT.isVector()) {
2010  SDValue FoldedVOp = SimplifyVBinOp(N);
2011  if (FoldedVOp.getNode()) return FoldedVOp;
2012  }
2013 
2014  // fold (udiv c1, c2) -> c1/c2
2015  if (N0C && N1C && !N1C->isNullValue())
2016  return DAG.FoldConstantArithmetic(ISD::UDIV, VT, N0C, N1C);
2017  // fold (udiv x, (1 << c)) -> x >>u c
2018  if (N1C && N1C->getAPIntValue().isPowerOf2())
2019  return DAG.getNode(ISD::SRL, SDLoc(N), VT, N0,
2020  DAG.getConstant(N1C->getAPIntValue().logBase2(),
2022  // fold (udiv x, (shl c, y)) -> x >>u (log2(c)+y) iff c is power of 2
2023  if (N1.getOpcode() == ISD::SHL) {
2024  if (ConstantSDNode *SHC = dyn_cast<ConstantSDNode>(N1.getOperand(0))) {
2025  if (SHC->getAPIntValue().isPowerOf2()) {
2026  EVT ADDVT = N1.getOperand(1).getValueType();
2027  SDValue Add = DAG.getNode(ISD::ADD, SDLoc(N), ADDVT,
2028  N1.getOperand(1),
2029  DAG.getConstant(SHC->getAPIntValue()
2030  .logBase2(),
2031  ADDVT));
2032  AddToWorkList(Add.getNode());
2033  return DAG.getNode(ISD::SRL, SDLoc(N), VT, N0, Add);
2034  }
2035  }
2036  }
2037  // fold (udiv x, c) -> alternate
2038  if (N1C && !N1C->isNullValue() && !TLI.isIntDivCheap()) {
2039  SDValue Op = BuildUDIV(N);
2040  if (Op.getNode()) return Op;
2041  }
2042 
2043  // undef / X -> 0
2044  if (N0.getOpcode() == ISD::UNDEF)
2045  return DAG.getConstant(0, VT);
2046  // X / undef -> undef
2047  if (N1.getOpcode() == ISD::UNDEF)
2048  return N1;
2049 
2050  return SDValue();
2051 }
2052 
2053 SDValue DAGCombiner::visitSREM(SDNode *N) {
2054  SDValue N0 = N->getOperand(0);
2055  SDValue N1 = N->getOperand(1);
2058  EVT VT = N->getValueType(0);
2059 
2060  // fold (srem c1, c2) -> c1%c2
2061  if (N0C && N1C && !N1C->isNullValue())
2062  return DAG.FoldConstantArithmetic(ISD::SREM, VT, N0C, N1C);
2063  // If we know the sign bits of both operands are zero, strength reduce to a
2064  // urem instead. Handles (X & 0x0FFFFFFF) %s 16 -> X&15
2065  if (!VT.isVector()) {
2066  if (DAG.SignBitIsZero(N1) && DAG.SignBitIsZero(N0))
2067  return DAG.getNode(ISD::UREM, SDLoc(N), VT, N0, N1);
2068  }
2069 
2070  // If X/C can be simplified by the division-by-constant logic, lower
2071  // X%C to the equivalent of X-X/C*C.
2072  if (N1C && !N1C->isNullValue()) {
2073  SDValue Div = DAG.getNode(ISD::SDIV, SDLoc(N), VT, N0, N1);
2074  AddToWorkList(Div.getNode());
2075  SDValue OptimizedDiv = combine(Div.getNode());
2076  if (OptimizedDiv.getNode() && OptimizedDiv.getNode() != Div.getNode()) {
2077  SDValue Mul = DAG.getNode(ISD::MUL, SDLoc(N), VT,
2078  OptimizedDiv, N1);
2079  SDValue Sub = DAG.getNode(ISD::SUB, SDLoc(N), VT, N0, Mul);
2080  AddToWorkList(Mul.getNode());
2081  return Sub;
2082  }
2083  }
2084 
2085  // undef % X -> 0
2086  if (N0.getOpcode() == ISD::UNDEF)
2087  return DAG.getConstant(0, VT);
2088  // X % undef -> undef
2089  if (N1.getOpcode() == ISD::UNDEF)
2090  return N1;
2091 
2092  return SDValue();
2093 }
2094 
2095 SDValue DAGCombiner::visitUREM(SDNode *N) {
2096  SDValue N0 = N->getOperand(0);
2097  SDValue N1 = N->getOperand(1);
2100  EVT VT = N->getValueType(0);
2101 
2102  // fold (urem c1, c2) -> c1%c2
2103  if (N0C && N1C && !N1C->isNullValue())
2104  return DAG.FoldConstantArithmetic(ISD::UREM, VT, N0C, N1C);
2105  // fold (urem x, pow2) -> (and x, pow2-1)
2106  if (N1C && !N1C->isNullValue() && N1C->getAPIntValue().isPowerOf2())
2107  return DAG.getNode(ISD::AND, SDLoc(N), VT, N0,
2108  DAG.getConstant(N1C->getAPIntValue()-1,VT));
2109  // fold (urem x, (shl pow2, y)) -> (and x, (add (shl pow2, y), -1))
2110  if (N1.getOpcode() == ISD::SHL) {
2111  if (ConstantSDNode *SHC = dyn_cast<ConstantSDNode>(N1.getOperand(0))) {
2112  if (SHC->getAPIntValue().isPowerOf2()) {
2113  SDValue Add =
2114  DAG.getNode(ISD::ADD, SDLoc(N), VT, N1,
2115  DAG.getConstant(APInt::getAllOnesValue(VT.getSizeInBits()),
2116  VT));
2117  AddToWorkList(Add.getNode());
2118  return DAG.getNode(ISD::AND, SDLoc(N), VT, N0, Add);
2119  }
2120  }
2121  }
2122 
2123  // If X/C can be simplified by the division-by-constant logic, lower
2124  // X%C to the equivalent of X-X/C*C.
2125  if (N1C && !N1C->isNullValue()) {
2126  SDValue Div = DAG.getNode(ISD::UDIV, SDLoc(N), VT, N0, N1);
2127  AddToWorkList(Div.getNode());
2128  SDValue OptimizedDiv = combine(Div.getNode());
2129  if (OptimizedDiv.getNode() && OptimizedDiv.getNode() != Div.getNode()) {
2130  SDValue Mul = DAG.getNode(ISD::MUL, SDLoc(N), VT,
2131  OptimizedDiv, N1);
2132  SDValue Sub = DAG.getNode(ISD::SUB, SDLoc(N), VT, N0, Mul);
2133  AddToWorkList(Mul.getNode());
2134  return Sub;
2135  }
2136  }
2137 
2138  // undef % X -> 0
2139  if (N0.getOpcode() == ISD::UNDEF)
2140  return DAG.getConstant(0, VT);
2141  // X % undef -> undef
2142  if (N1.getOpcode() == ISD::UNDEF)
2143  return N1;
2144 
2145  return SDValue();
2146 }
2147 
2148 SDValue DAGCombiner::visitMULHS(SDNode *N) {
2149  SDValue N0 = N->getOperand(0);
2150  SDValue N1 = N->getOperand(1);
2152  EVT VT = N->getValueType(0);
2153  SDLoc DL(N);
2154 
2155  // fold (mulhs x, 0) -> 0
2156  if (N1C && N1C->isNullValue())
2157  return N1;
2158  // fold (mulhs x, 1) -> (sra x, size(x)-1)
2159  if (N1C && N1C->getAPIntValue() == 1)
2160  return DAG.getNode(ISD::SRA, SDLoc(N), N0.getValueType(), N0,
2161  DAG.getConstant(N0.getValueType().getSizeInBits() - 1,
2163  // fold (mulhs x, undef) -> 0
2164  if (N0.getOpcode() == ISD::UNDEF || N1.getOpcode() == ISD::UNDEF)
2165  return DAG.getConstant(0, VT);
2166 
2167  // If the type twice as wide is legal, transform the mulhs to a wider multiply
2168  // plus a shift.
2169  if (VT.isSimple() && !VT.isVector()) {
2170  MVT Simple = VT.getSimpleVT();
2171  unsigned SimpleSize = Simple.getSizeInBits();
2172  EVT NewVT = EVT::getIntegerVT(*DAG.getContext(), SimpleSize*2);
2173  if (TLI.isOperationLegal(ISD::MUL, NewVT)) {
2174  N0 = DAG.getNode(ISD::SIGN_EXTEND, DL, NewVT, N0);
2175  N1 = DAG.getNode(ISD::SIGN_EXTEND, DL, NewVT, N1);
2176  N1 = DAG.getNode(ISD::MUL, DL, NewVT, N0, N1);
2177  N1 = DAG.getNode(ISD::SRL, DL, NewVT, N1,
2178  DAG.getConstant(SimpleSize, getShiftAmountTy(N1.getValueType())));
2179  return DAG.getNode(ISD::TRUNCATE, DL, VT, N1);
2180  }
2181  }
2182 
2183  return SDValue();
2184 }
2185 
2186 SDValue DAGCombiner::visitMULHU(SDNode *N) {
2187  SDValue N0 = N->getOperand(0);
2188  SDValue N1 = N->getOperand(1);
2190  EVT VT = N->getValueType(0);
2191  SDLoc DL(N);
2192 
2193  // fold (mulhu x, 0) -> 0
2194  if (N1C && N1C->isNullValue())
2195  return N1;
2196  // fold (mulhu x, 1) -> 0
2197  if (N1C && N1C->getAPIntValue() == 1)
2198  return DAG.getConstant(0, N0.getValueType());
2199  // fold (mulhu x, undef) -> 0
2200  if (N0.getOpcode() == ISD::UNDEF || N1.getOpcode() == ISD::UNDEF)
2201  return DAG.getConstant(0, VT);
2202 
2203  // If the type twice as wide is legal, transform the mulhu to a wider multiply
2204  // plus a shift.
2205  if (VT.isSimple() && !VT.isVector()) {
2206  MVT Simple = VT.getSimpleVT();
2207  unsigned SimpleSize = Simple.getSizeInBits();
2208  EVT NewVT = EVT::getIntegerVT(*DAG.getContext(), SimpleSize*2);
2209  if (TLI.isOperationLegal(ISD::MUL, NewVT)) {
2210  N0 = DAG.getNode(ISD::ZERO_EXTEND, DL, NewVT, N0);
2211  N1 = DAG.getNode(ISD::ZERO_EXTEND, DL, NewVT, N1);
2212  N1 = DAG.getNode(ISD::MUL, DL, NewVT, N0, N1);
2213  N1 = DAG.getNode(ISD::SRL, DL, NewVT, N1,
2214  DAG.getConstant(SimpleSize, getShiftAmountTy(N1.getValueType())));
2215  return DAG.getNode(ISD::TRUNCATE, DL, VT, N1);
2216  }
2217  }
2218 
2219  return SDValue();
2220 }
2221 
2222 /// SimplifyNodeWithTwoResults - Perform optimizations common to nodes that
2223 /// compute two values. LoOp and HiOp give the opcodes for the two computations
2224 /// that are being performed. Return true if a simplification was made.
2225 ///
2226 SDValue DAGCombiner::SimplifyNodeWithTwoResults(SDNode *N, unsigned LoOp,
2227  unsigned HiOp) {
2228  // If the high half is not needed, just compute the low half.
2229  bool HiExists = N->hasAnyUseOfValue(1);
2230  if (!HiExists &&
2231  (!LegalOperations ||
2232  TLI.isOperationLegal(LoOp, N->getValueType(0)))) {
2233  SDValue Res = DAG.getNode(LoOp, SDLoc(N), N->getValueType(0),
2234  N->op_begin(), N->getNumOperands());
2235  return CombineTo(N, Res, Res);
2236  }
2237 
2238  // If the low half is not needed, just compute the high half.
2239  bool LoExists = N->hasAnyUseOfValue(0);
2240  if (!LoExists &&
2241  (!LegalOperations ||
2242  TLI.isOperationLegal(HiOp, N->getValueType(1)))) {
2243  SDValue Res = DAG.getNode(HiOp, SDLoc(N), N->getValueType(1),
2244  N->op_begin(), N->getNumOperands());
2245  return CombineTo(N, Res, Res);
2246  }
2247 
2248  // If both halves are used, return as it is.
2249  if (LoExists && HiExists)
2250  return SDValue();
2251 
2252  // If the two computed results can be simplified separately, separate them.
2253  if (LoExists) {
2254  SDValue Lo = DAG.getNode(LoOp, SDLoc(N), N->getValueType(0),
2255  N->op_begin(), N->getNumOperands());
2256  AddToWorkList(Lo.getNode());
2257  SDValue LoOpt = combine(Lo.getNode());
2258  if (LoOpt.getNode() && LoOpt.getNode() != Lo.getNode() &&
2259  (!LegalOperations ||
2260  TLI.isOperationLegal(LoOpt.getOpcode(), LoOpt.getValueType())))
2261  return CombineTo(N, LoOpt, LoOpt);
2262  }
2263 
2264  if (HiExists) {
2265  SDValue Hi = DAG.getNode(HiOp, SDLoc(N), N->getValueType(1),
2266  N->op_begin(), N->getNumOperands());
2267  AddToWorkList(Hi.getNode());
2268  SDValue HiOpt = combine(Hi.getNode());
2269  if (HiOpt.getNode() && HiOpt != Hi &&
2270  (!LegalOperations ||
2271  TLI.isOperationLegal(HiOpt.getOpcode(), HiOpt.getValueType())))
2272  return CombineTo(N, HiOpt, HiOpt);
2273  }
2274 
2275  return SDValue();
2276 }
2277 
2278 SDValue DAGCombiner::visitSMUL_LOHI(SDNode *N) {
2279  SDValue Res = SimplifyNodeWithTwoResults(N, ISD::MUL, ISD::MULHS);
2280  if (Res.getNode()) return Res;
2281 
2282  EVT VT = N->getValueType(0);
2283  SDLoc DL(N);
2284 
2285  // If the type twice as wide is legal, transform the mulhu to a wider multiply
2286  // plus a shift.
2287  if (VT.isSimple() && !VT.isVector()) {
2288  MVT Simple = VT.getSimpleVT();
2289  unsigned SimpleSize = Simple.getSizeInBits();
2290  EVT NewVT = EVT::getIntegerVT(*DAG.getContext(), SimpleSize*2);
2291  if (TLI.isOperationLegal(ISD::MUL, NewVT)) {
2292  SDValue Lo = DAG.getNode(ISD::SIGN_EXTEND, DL, NewVT, N->getOperand(0));
2293  SDValue Hi = DAG.getNode(ISD::SIGN_EXTEND, DL, NewVT, N->getOperand(1));
2294  Lo = DAG.getNode(ISD::MUL, DL, NewVT, Lo, Hi);
2295  // Compute the high part as N1.
2296  Hi = DAG.getNode(ISD::SRL, DL, NewVT, Lo,
2297  DAG.getConstant(SimpleSize, getShiftAmountTy(Lo.getValueType())));
2298  Hi = DAG.getNode(ISD::TRUNCATE, DL, VT, Hi);
2299  // Compute the low part as N0.
2300  Lo = DAG.getNode(ISD::TRUNCATE, DL, VT, Lo);
2301  return CombineTo(N, Lo, Hi);
2302  }
2303  }
2304 
2305  return SDValue();
2306 }
2307 
2308 SDValue DAGCombiner::visitUMUL_LOHI(SDNode *N) {
2309  SDValue Res = SimplifyNodeWithTwoResults(N, ISD::MUL, ISD::MULHU);
2310  if (Res.getNode()) return Res;
2311 
2312  EVT VT = N->getValueType(0);
2313  SDLoc DL(N);
2314 
2315  // If the type twice as wide is legal, transform the mulhu to a wider multiply
2316  // plus a shift.
2317  if (VT.isSimple() && !VT.isVector()) {
2318  MVT Simple = VT.getSimpleVT();
2319  unsigned SimpleSize = Simple.getSizeInBits();
2320  EVT NewVT = EVT::getIntegerVT(*DAG.getContext(), SimpleSize*2);
2321  if (TLI.isOperationLegal(ISD::MUL, NewVT)) {
2322  SDValue Lo = DAG.getNode(ISD::ZERO_EXTEND, DL, NewVT, N->getOperand(0));
2323  SDValue Hi = DAG.getNode(ISD::ZERO_EXTEND, DL, NewVT, N->getOperand(1));
2324  Lo = DAG.getNode(ISD::MUL, DL, NewVT, Lo, Hi);
2325  // Compute the high part as N1.
2326  Hi = DAG.getNode(ISD::SRL, DL, NewVT, Lo,
2327  DAG.getConstant(SimpleSize, getShiftAmountTy(Lo.getValueType())));
2328  Hi = DAG.getNode(ISD::TRUNCATE, DL, VT, Hi);
2329  // Compute the low part as N0.
2330  Lo = DAG.getNode(ISD::TRUNCATE, DL, VT, Lo);
2331  return CombineTo(N, Lo, Hi);
2332  }
2333  }
2334 
2335  return SDValue();
2336 }
2337 
2338 SDValue DAGCombiner::visitSMULO(SDNode *N) {
2339  // (smulo x, 2) -> (saddo x, x)
2340  if (ConstantSDNode *C2 = dyn_cast<ConstantSDNode>(N->getOperand(1)))
2341  if (C2->getAPIntValue() == 2)
2342  return DAG.getNode(ISD::SADDO, SDLoc(N), N->getVTList(),
2343  N->getOperand(0), N->getOperand(0));
2344 
2345  return SDValue();
2346 }
2347 
2348 SDValue DAGCombiner::visitUMULO(SDNode *N) {
2349  // (umulo x, 2) -> (uaddo x, x)
2350  if (ConstantSDNode *C2 = dyn_cast<ConstantSDNode>(N->getOperand(1)))
2351  if (C2->getAPIntValue() == 2)
2352  return DAG.getNode(ISD::UADDO, SDLoc(N), N->getVTList(),
2353  N->getOperand(0), N->getOperand(0));
2354 
2355  return SDValue();
2356 }
2357 
2358 SDValue DAGCombiner::visitSDIVREM(SDNode *N) {
2359  SDValue Res = SimplifyNodeWithTwoResults(N, ISD::SDIV, ISD::SREM);
2360  if (Res.getNode()) return Res;
2361 
2362  return SDValue();
2363 }
2364 
2365 SDValue DAGCombiner::visitUDIVREM(SDNode *N) {
2366  SDValue Res = SimplifyNodeWithTwoResults(N, ISD::UDIV, ISD::UREM);
2367  if (Res.getNode()) return Res;
2368 
2369  return SDValue();
2370 }
2371 
2372 /// SimplifyBinOpWithSameOpcodeHands - If this is a binary operator with
2373 /// two operands of the same opcode, try to simplify it.
2374 SDValue DAGCombiner::SimplifyBinOpWithSameOpcodeHands(SDNode *N) {
2375  SDValue N0 = N->getOperand(0), N1 = N->getOperand(1);
2376  EVT VT = N0.getValueType();
2377  assert(N0.getOpcode() == N1.getOpcode() && "Bad input!");
2378 
2379  // Bail early if none of these transforms apply.
2380  if (N0.getNode()->getNumOperands() == 0) return SDValue();
2381 
2382  // For each of OP in AND/OR/XOR:
2383  // fold (OP (zext x), (zext y)) -> (zext (OP x, y))
2384  // fold (OP (sext x), (sext y)) -> (sext (OP x, y))
2385  // fold (OP (aext x), (aext y)) -> (aext (OP x, y))
2386  // fold (OP (trunc x), (trunc y)) -> (trunc (OP x, y)) (if trunc isn't free)
2387  //
2388  // do not sink logical op inside of a vector extend, since it may combine
2389  // into a vsetcc.
2390  EVT Op0VT = N0.getOperand(0).getValueType();
2391  if ((N0.getOpcode() == ISD::ZERO_EXTEND ||
2392  N0.getOpcode() == ISD::SIGN_EXTEND ||
2393  // Avoid infinite looping with PromoteIntBinOp.
2394  (N0.getOpcode() == ISD::ANY_EXTEND &&
2395  (!LegalTypes || TLI.isTypeDesirableForOp(N->getOpcode(), Op0VT))) ||
2396  (N0.getOpcode() == ISD::TRUNCATE &&
2397  (!TLI.isZExtFree(VT, Op0VT) ||
2398  !TLI.isTruncateFree(Op0VT, VT)) &&
2399  TLI.isTypeLegal(Op0VT))) &&
2400  !VT.isVector() &&
2401  Op0VT == N1.getOperand(0).getValueType() &&
2402  (!LegalOperations || TLI.isOperationLegal(N->getOpcode(), Op0VT))) {
2403  SDValue ORNode = DAG.getNode(N->getOpcode(), SDLoc(N0),
2404  N0.getOperand(0).getValueType(),
2405  N0.getOperand(0), N1.getOperand(0));
2406  AddToWorkList(ORNode.getNode());
2407  return DAG.getNode(N0.getOpcode(), SDLoc(N), VT, ORNode);
2408  }
2409 
2410  // For each of OP in SHL/SRL/SRA/AND...
2411  // fold (and (OP x, z), (OP y, z)) -> (OP (and x, y), z)
2412  // fold (or (OP x, z), (OP y, z)) -> (OP (or x, y), z)
2413  // fold (xor (OP x, z), (OP y, z)) -> (OP (xor x, y), z)
2414  if ((N0.getOpcode() == ISD::SHL || N0.getOpcode() == ISD::SRL ||
2415  N0.getOpcode() == ISD::SRA || N0.getOpcode() == ISD::AND) &&
2416  N0.getOperand(1) == N1.getOperand(1)) {
2417  SDValue ORNode = DAG.getNode(N->getOpcode(), SDLoc(N0),
2418  N0.getOperand(0).getValueType(),
2419  N0.getOperand(0), N1.getOperand(0));
2420  AddToWorkList(ORNode.getNode());
2421  return DAG.getNode(N0.getOpcode(), SDLoc(N), VT,
2422  ORNode, N0.getOperand(1));
2423  }
2424 
2425  // Simplify xor/and/or (bitcast(A), bitcast(B)) -> bitcast(op (A,B))
2426  // Only perform this optimization after type legalization and before
2427  // LegalizeVectorOprs. LegalizeVectorOprs promotes vector operations by
2428  // adding bitcasts. For example (xor v4i32) is promoted to (v2i64), and
2429  // we don't want to undo this promotion.
2430  // We also handle SCALAR_TO_VECTOR because xor/or/and operations are cheaper
2431  // on scalars.
2432  if ((N0.getOpcode() == ISD::BITCAST ||
2433  N0.getOpcode() == ISD::SCALAR_TO_VECTOR) &&
2435  SDValue In0 = N0.getOperand(0);
2436  SDValue In1 = N1.getOperand(0);
2437  EVT In0Ty = In0.getValueType();
2438  EVT In1Ty = In1.getValueType();
2439  SDLoc DL(N);
2440  // If both incoming values are integers, and the original types are the
2441  // same.
2442  if (In0Ty.isInteger() && In1Ty.isInteger() && In0Ty == In1Ty) {
2443  SDValue Op = DAG.getNode(N->getOpcode(), DL, In0Ty, In0, In1);
2444  SDValue BC = DAG.getNode(N0.getOpcode(), DL, VT, Op);
2445  AddToWorkList(Op.getNode());
2446  return BC;
2447  }
2448  }
2449 
2450  // Xor/and/or are indifferent to the swizzle operation (shuffle of one value).
2451  // Simplify xor/and/or (shuff(A), shuff(B)) -> shuff(op (A,B))
2452  // If both shuffles use the same mask, and both shuffle within a single
2453  // vector, then it is worthwhile to move the swizzle after the operation.
2454  // The type-legalizer generates this pattern when loading illegal
2455  // vector types from memory. In many cases this allows additional shuffle
2456  // optimizations.
2458  N0.getOperand(1).getOpcode() == ISD::UNDEF &&
2459  N1.getOperand(1).getOpcode() == ISD::UNDEF) {
2460  ShuffleVectorSDNode *SVN0 = cast<ShuffleVectorSDNode>(N0);
2461  ShuffleVectorSDNode *SVN1 = cast<ShuffleVectorSDNode>(N1);
2462 
2463  assert(N0.getOperand(0).getValueType() == N1.getOperand(1).getValueType() &&
2464  "Inputs to shuffles are not the same type");
2465 
2466  unsigned NumElts = VT.getVectorNumElements();
2467 
2468  // Check that both shuffles use the same mask. The masks are known to be of
2469  // the same length because the result vector type is the same.
2470  bool SameMask = true;
2471  for (unsigned i = 0; i != NumElts; ++i) {
2472  int Idx0 = SVN0->getMaskElt(i);
2473  int Idx1 = SVN1->getMaskElt(i);
2474  if (Idx0 != Idx1) {
2475  SameMask = false;
2476  break;
2477  }
2478  }
2479 
2480  if (SameMask) {
2481  SDValue Op = DAG.getNode(N->getOpcode(), SDLoc(N), VT,
2482  N0.getOperand(0), N1.getOperand(0));
2483  AddToWorkList(Op.getNode());
2484  return DAG.getVectorShuffle(VT, SDLoc(N), Op,
2485  DAG.getUNDEF(VT), &SVN0->getMask()[0]);
2486  }
2487  }
2488 
2489  return SDValue();
2490 }
2491 
2492 SDValue DAGCombiner::visitAND(SDNode *N) {
2493  SDValue N0 = N->getOperand(0);
2494  SDValue N1 = N->getOperand(1);
2495  SDValue LL, LR, RL, RR, CC0, CC1;
2498  EVT VT = N1.getValueType();
2499  unsigned BitWidth = VT.getScalarType().getSizeInBits();
2500 
2501  // fold vector ops
2502  if (VT.isVector()) {
2503  SDValue FoldedVOp = SimplifyVBinOp(N);
2504  if (FoldedVOp.getNode()) return FoldedVOp;
2505 
2506  // fold (and x, 0) -> 0, vector edition
2508  return N0;
2510  return N1;
2511 
2512  // fold (and x, -1) -> x, vector edition
2514  return N1;
2516  return N0;
2517  }
2518 
2519  // fold (and x, undef) -> 0
2520  if (N0.getOpcode() == ISD::UNDEF || N1.getOpcode() == ISD::UNDEF)
2521  return DAG.getConstant(0, VT);
2522  // fold (and c1, c2) -> c1&c2
2523  if (N0C && N1C)
2524  return DAG.FoldConstantArithmetic(ISD::AND, VT, N0C, N1C);
2525  // canonicalize constant to RHS
2526  if (N0C && !N1C)
2527  return DAG.getNode(ISD::AND, SDLoc(N), VT, N1, N0);
2528  // fold (and x, -1) -> x
2529  if (N1C && N1C->isAllOnesValue())
2530  return N0;
2531  // if (and x, c) is known to be zero, return 0
2532  if (N1C && DAG.MaskedValueIsZero(SDValue(N, 0),
2533  APInt::getAllOnesValue(BitWidth)))
2534  return DAG.getConstant(0, VT);
2535  // reassociate and
2536  SDValue RAND = ReassociateOps(ISD::AND, SDLoc(N), N0, N1);
2537  if (RAND.getNode() != 0)
2538  return RAND;
2539  // fold (and (or x, C), D) -> D if (C & D) == D
2540  if (N1C && N0.getOpcode() == ISD::OR)
2541  if (ConstantSDNode *ORI = dyn_cast<ConstantSDNode>(N0.getOperand(1)))
2542  if ((ORI->getAPIntValue() & N1C->getAPIntValue()) == N1C->getAPIntValue())
2543  return N1;
2544  // fold (and (any_ext V), c) -> (zero_ext V) if 'and' only clears top bits.
2545  if (N1C && N0.getOpcode() == ISD::ANY_EXTEND) {
2546  SDValue N0Op0 = N0.getOperand(0);
2547  APInt Mask = ~N1C->getAPIntValue();
2548  Mask = Mask.trunc(N0Op0.getValueSizeInBits());
2549  if (DAG.MaskedValueIsZero(N0Op0, Mask)) {
2550  SDValue Zext = DAG.getNode(ISD::ZERO_EXTEND, SDLoc(N),
2551  N0.getValueType(), N0Op0);
2552 
2553  // Replace uses of the AND with uses of the Zero extend node.
2554  CombineTo(N, Zext);
2555 
2556  // We actually want to replace all uses of the any_extend with the
2557  // zero_extend, to avoid duplicating things. This will later cause this
2558  // AND to be folded.
2559  CombineTo(N0.getNode(), Zext);
2560  return SDValue(N, 0); // Return N so it doesn't get rechecked!
2561  }
2562  }
2563  // similarly fold (and (X (load ([non_ext|any_ext|zero_ext] V))), c) ->
2564  // (X (load ([non_ext|zero_ext] V))) if 'and' only clears top bits which must
2565  // already be zero by virtue of the width of the base type of the load.
2566  //
2567  // the 'X' node here can either be nothing or an extract_vector_elt to catch
2568  // more cases.
2569  if ((N0.getOpcode() == ISD::EXTRACT_VECTOR_ELT &&
2570  N0.getOperand(0).getOpcode() == ISD::LOAD) ||
2571  N0.getOpcode() == ISD::LOAD) {
2572  LoadSDNode *Load = cast<LoadSDNode>( (N0.getOpcode() == ISD::LOAD) ?
2573  N0 : N0.getOperand(0) );
2574 
2575  // Get the constant (if applicable) the zero'th operand is being ANDed with.
2576  // This can be a pure constant or a vector splat, in which case we treat the
2577  // vector as a scalar and use the splat value.
2579  if (const ConstantSDNode *C = dyn_cast<ConstantSDNode>(N1)) {
2580  Constant = C->getAPIntValue();
2581  } else if (BuildVectorSDNode *Vector = dyn_cast<BuildVectorSDNode>(N1)) {
2582  APInt SplatValue, SplatUndef;
2583  unsigned SplatBitSize;
2584  bool HasAnyUndefs;
2585  bool IsSplat = Vector->isConstantSplat(SplatValue, SplatUndef,
2586  SplatBitSize, HasAnyUndefs);
2587  if (IsSplat) {
2588  // Undef bits can contribute to a possible optimisation if set, so
2589  // set them.
2590  SplatValue |= SplatUndef;
2591 
2592  // The splat value may be something like "0x00FFFFFF", which means 0 for
2593  // the first vector value and FF for the rest, repeating. We need a mask
2594  // that will apply equally to all members of the vector, so AND all the
2595  // lanes of the constant together.
2596  EVT VT = Vector->getValueType(0);
2597  unsigned BitWidth = VT.getVectorElementType().getSizeInBits();
2598 
2599  // If the splat value has been compressed to a bitlength lower
2600  // than the size of the vector lane, we need to re-expand it to
2601  // the lane size.
2602  if (BitWidth > SplatBitSize)
2603  for (SplatValue = SplatValue.zextOrTrunc(BitWidth);
2604  SplatBitSize < BitWidth;
2605  SplatBitSize = SplatBitSize * 2)
2606  SplatValue |= SplatValue.shl(SplatBitSize);
2607 
2608  Constant = APInt::getAllOnesValue(BitWidth);
2609  for (unsigned i = 0, n = SplatBitSize/BitWidth; i < n; ++i)
2610  Constant &= SplatValue.lshr(i*BitWidth).zextOrTrunc(BitWidth);
2611  }
2612  }
2613 
2614  // If we want to change an EXTLOAD to a ZEXTLOAD, ensure a ZEXTLOAD is
2615  // actually legal and isn't going to get expanded, else this is a false
2616  // optimisation.
2617  bool CanZextLoadProfitably = TLI.isLoadExtLegal(ISD::ZEXTLOAD,
2618  Load->getMemoryVT());
2619 
2620  // Resize the constant to the same size as the original memory access before
2621  // extension. If it is still the AllOnesValue then this AND is completely
2622  // unneeded.
2623  Constant =
2624  Constant.zextOrTrunc(Load->getMemoryVT().getScalarType().getSizeInBits());
2625 
2626  bool B;
2627  switch (Load->getExtensionType()) {
2628  default: B = false; break;
2629  case ISD::EXTLOAD: B = CanZextLoadProfitably; break;
2630  case ISD::ZEXTLOAD:
2631  case ISD::NON_EXTLOAD: B = true; break;
2632  }
2633 
2634  if (B && Constant.isAllOnesValue()) {
2635  // If the load type was an EXTLOAD, convert to ZEXTLOAD in order to
2636  // preserve semantics once we get rid of the AND.
2637  SDValue NewLoad(Load, 0);
2638  if (Load->getExtensionType() == ISD::EXTLOAD) {
2639  NewLoad = DAG.getLoad(Load->getAddressingMode(), ISD::ZEXTLOAD,
2640  Load->getValueType(0), SDLoc(Load),
2641  Load->getChain(), Load->getBasePtr(),
2642  Load->getOffset(), Load->getMemoryVT(),
2643  Load->getMemOperand());
2644  // Replace uses of the EXTLOAD with the new ZEXTLOAD.
2645  if (Load->getNumValues() == 3) {
2646  // PRE/POST_INC loads have 3 values.
2647  SDValue To[] = { NewLoad.getValue(0), NewLoad.getValue(1),
2648  NewLoad.getValue(2) };
2649  CombineTo(Load, To, 3, true);
2650  } else {
2651  CombineTo(Load, NewLoad.getValue(0), NewLoad.getValue(1));
2652  }
2653  }
2654 
2655  // Fold the AND away, taking care not to fold to the old load node if we
2656  // replaced it.
2657  CombineTo(N, (N0.getNode() == Load) ? NewLoad : N0);
2658 
2659  return SDValue(N, 0); // Return N so it doesn't get rechecked!
2660  }
2661  }
2662  // fold (and (setcc x), (setcc y)) -> (setcc (and x, y))
2663  if (isSetCCEquivalent(N0, LL, LR, CC0) && isSetCCEquivalent(N1, RL, RR, CC1)){
2664  ISD::CondCode Op0 = cast<CondCodeSDNode>(CC0)->get();
2665  ISD::CondCode Op1 = cast<CondCodeSDNode>(CC1)->get();
2666 
2667  if (LR == RR && isa<ConstantSDNode>(LR) && Op0 == Op1 &&
2668  LL.getValueType().isInteger()) {
2669  // fold (and (seteq X, 0), (seteq Y, 0)) -> (seteq (or X, Y), 0)
2670  if (cast<ConstantSDNode>(LR)->isNullValue() && Op1 == ISD::SETEQ) {
2671  SDValue ORNode = DAG.getNode(ISD::OR, SDLoc(N0),
2672  LR.getValueType(), LL, RL);
2673  AddToWorkList(ORNode.getNode());
2674  return DAG.getSetCC(SDLoc(N), VT, ORNode, LR, Op1);
2675  }
2676  // fold (and (seteq X, -1), (seteq Y, -1)) -> (seteq (and X, Y), -1)
2677  if (cast<ConstantSDNode>(LR)->isAllOnesValue() && Op1 == ISD::SETEQ) {
2678  SDValue ANDNode = DAG.getNode(ISD::AND, SDLoc(N0),
2679  LR.getValueType(), LL, RL);
2680  AddToWorkList(ANDNode.getNode());
2681  return DAG.getSetCC(SDLoc(N), VT, ANDNode, LR, Op1);
2682  }
2683  // fold (and (setgt X, -1), (setgt Y, -1)) -> (setgt (or X, Y), -1)
2684  if (cast<ConstantSDNode>(LR)->isAllOnesValue() && Op1 == ISD::SETGT) {
2685  SDValue ORNode = DAG.getNode(ISD::OR, SDLoc(N0),
2686  LR.getValueType(), LL, RL);
2687  AddToWorkList(ORNode.getNode());
2688  return DAG.getSetCC(SDLoc(N), VT, ORNode, LR, Op1);
2689  }
2690  }
2691  // Simplify (and (setne X, 0), (setne X, -1)) -> (setuge (add X, 1), 2)
2692  if (LL == RL && isa<ConstantSDNode>(LR) && isa<ConstantSDNode>(RR) &&
2693  Op0 == Op1 && LL.getValueType().isInteger() &&
2694  Op0 == ISD::SETNE && ((cast<ConstantSDNode>(LR)->isNullValue() &&
2695  cast<ConstantSDNode>(RR)->isAllOnesValue()) ||
2696  (cast<ConstantSDNode>(LR)->isAllOnesValue() &&
2697  cast<ConstantSDNode>(RR)->isNullValue()))) {
2698  SDValue ADDNode = DAG.getNode(ISD::ADD, SDLoc(N0), LL.getValueType(),
2699  LL, DAG.getConstant(1, LL.getValueType()));
2700  AddToWorkList(ADDNode.getNode());
2701  return DAG.getSetCC(SDLoc(N), VT, ADDNode,
2702  DAG.getConstant(2, LL.getValueType()), ISD::SETUGE);
2703  }
2704  // canonicalize equivalent to ll == rl
2705  if (LL == RR && LR == RL) {
2706  Op1 = ISD::getSetCCSwappedOperands(Op1);
2707  std::swap(RL, RR);
2708  }
2709  if (LL == RL && LR == RR) {
2710  bool isInteger = LL.getValueType().isInteger();
2711  ISD::CondCode Result = ISD::getSetCCAndOperation(Op0, Op1, isInteger);
2712  if (Result != ISD::SETCC_INVALID &&
2713  (!LegalOperations ||
2714  (TLI.isCondCodeLegal(Result, LL.getSimpleValueType()) &&
2715  TLI.isOperationLegal(ISD::SETCC,
2717  return DAG.getSetCC(SDLoc(N), N0.getValueType(),
2718  LL, LR, Result);
2719  }
2720  }
2721 
2722  // Simplify: (and (op x...), (op y...)) -> (op (and x, y))
2723  if (N0.getOpcode() == N1.getOpcode()) {
2724  SDValue Tmp = SimplifyBinOpWithSameOpcodeHands(N);
2725  if (Tmp.getNode()) return Tmp;
2726  }
2727 
2728  // fold (and (sign_extend_inreg x, i16 to i32), 1) -> (and x, 1)
2729  // fold (and (sra)) -> (and (srl)) when possible.
2730  if (!VT.isVector() &&
2732  return SDValue(N, 0);
2733 
2734  // fold (zext_inreg (extload x)) -> (zextload x)
2735  if (ISD::isEXTLoad(N0.getNode()) && ISD::isUNINDEXEDLoad(N0.getNode())) {
2736  LoadSDNode *LN0 = cast<LoadSDNode>(N0);
2737  EVT MemVT = LN0->getMemoryVT();
2738  // If we zero all the possible extended bits, then we can turn this into
2739  // a zextload if we are running before legalize or the operation is legal.
2740  unsigned BitWidth = N1.getValueType().getScalarType().getSizeInBits();
2741  if (DAG.MaskedValueIsZero(N1, APInt::getHighBitsSet(BitWidth,
2742  BitWidth - MemVT.getScalarType().getSizeInBits())) &&
2743  ((!LegalOperations && !LN0->isVolatile()) ||
2744  TLI.isLoadExtLegal(ISD::ZEXTLOAD, MemVT))) {
2745  SDValue ExtLoad = DAG.getExtLoad(ISD::ZEXTLOAD, SDLoc(N0), VT,
2746  LN0->getChain(), LN0->getBasePtr(),
2747  MemVT, LN0->getMemOperand());
2748  AddToWorkList(N);
2749  CombineTo(N0.getNode(), ExtLoad, ExtLoad.getValue(1));
2750  return SDValue(N, 0); // Return N so it doesn't get rechecked!
2751  }
2752  }
2753  // fold (zext_inreg (sextload x)) -> (zextload x) iff load has one use
2754  if (ISD::isSEXTLoad(N0.getNode()) && ISD::isUNINDEXEDLoad(N0.getNode()) &&
2755  N0.hasOneUse()) {
2756  LoadSDNode *LN0 = cast<LoadSDNode>(N0);
2757  EVT MemVT = LN0->getMemoryVT();
2758  // If we zero all the possible extended bits, then we can turn this into
2759  // a zextload if we are running before legalize or the operation is legal.
2760  unsigned BitWidth = N1.getValueType().getScalarType().getSizeInBits();
2761  if (DAG.MaskedValueIsZero(N1, APInt::getHighBitsSet(BitWidth,
2762  BitWidth - MemVT.getScalarType().getSizeInBits())) &&
2763  ((!LegalOperations && !LN0->isVolatile()) ||
2764  TLI.isLoadExtLegal(ISD::ZEXTLOAD, MemVT))) {
2765  SDValue ExtLoad = DAG.getExtLoad(ISD::ZEXTLOAD, SDLoc(N0), VT,
2766  LN0->getChain(), LN0->getBasePtr(),
2767  MemVT, LN0->getMemOperand());
2768  AddToWorkList(N);
2769  CombineTo(N0.getNode(), ExtLoad, ExtLoad.getValue(1));
2770  return SDValue(N, 0); // Return N so it doesn't get rechecked!
2771  }
2772  }
2773 
2774  // fold (and (load x), 255) -> (zextload x, i8)
2775  // fold (and (extload x, i16), 255) -> (zextload x, i8)
2776  // fold (and (any_ext (extload x, i16)), 255) -> (zextload x, i8)
2777  if (N1C && (N0.getOpcode() == ISD::LOAD ||
2778  (N0.getOpcode() == ISD::ANY_EXTEND &&
2779  N0.getOperand(0).getOpcode() == ISD::LOAD))) {
2780  bool HasAnyExt = N0.getOpcode() == ISD::ANY_EXTEND;
2781  LoadSDNode *LN0 = HasAnyExt
2782  ? cast<LoadSDNode>(N0.getOperand(0))
2783  : cast<LoadSDNode>(N0);
2784  if (LN0->getExtensionType() != ISD::SEXTLOAD &&
2785  LN0->isUnindexed() && N0.hasOneUse() && SDValue(LN0, 0).hasOneUse()) {
2786  uint32_t ActiveBits = N1C->getAPIntValue().getActiveBits();
2787  if (ActiveBits > 0 && APIntOps::isMask(ActiveBits, N1C->getAPIntValue())){
2788  EVT ExtVT = EVT::getIntegerVT(*DAG.getContext(), ActiveBits);
2789  EVT LoadedVT = LN0->getMemoryVT();
2790 
2791  if (ExtVT == LoadedVT &&
2792  (!LegalOperations || TLI.isLoadExtLegal(ISD::ZEXTLOAD, ExtVT))) {
2793  EVT LoadResultTy = HasAnyExt ? LN0->getValueType(0) : VT;
2794 
2795  SDValue NewLoad =
2796  DAG.getExtLoad(ISD::ZEXTLOAD, SDLoc(LN0), LoadResultTy,
2797  LN0->getChain(), LN0->getBasePtr(), ExtVT,
2798  LN0->getMemOperand());
2799  AddToWorkList(N);
2800  CombineTo(LN0, NewLoad, NewLoad.getValue(1));
2801  return SDValue(N, 0); // Return N so it doesn't get rechecked!
2802  }
2803 
2804  // Do not change the width of a volatile load.
2805  // Do not generate loads of non-round integer types since these can
2806  // be expensive (and would be wrong if the type is not byte sized).
2807  if (!LN0->isVolatile() && LoadedVT.bitsGT(ExtVT) && ExtVT.isRound() &&
2808  (!LegalOperations || TLI.isLoadExtLegal(ISD::ZEXTLOAD, ExtVT))) {
2809  EVT PtrType = LN0->getOperand(1).getValueType();
2810 
2811  unsigned Alignment = LN0->getAlignment();
2812  SDValue NewPtr = LN0->getBasePtr();
2813 
2814  // For big endian targets, we need to add an offset to the pointer
2815  // to load the correct bytes. For little endian systems, we merely
2816  // need to read fewer bytes from the same pointer.
2817  if (TLI.isBigEndian()) {
2818  unsigned LVTStoreBytes = LoadedVT.getStoreSize();
2819  unsigned EVTStoreBytes = ExtVT.getStoreSize();
2820  unsigned PtrOff = LVTStoreBytes - EVTStoreBytes;
2821  NewPtr = DAG.getNode(ISD::ADD, SDLoc(LN0), PtrType,
2822  NewPtr, DAG.getConstant(PtrOff, PtrType));
2823  Alignment = MinAlign(Alignment, PtrOff);
2824  }
2825 
2826  AddToWorkList(NewPtr.getNode());
2827 
2828  EVT LoadResultTy = HasAnyExt ? LN0->getValueType(0) : VT;
2829  SDValue Load =
2830  DAG.getExtLoad(ISD::ZEXTLOAD, SDLoc(LN0), LoadResultTy,
2831  LN0->getChain(), NewPtr,
2832  LN0->getPointerInfo(),
2833  ExtVT, LN0->isVolatile(), LN0->isNonTemporal(),
2834  Alignment, LN0->getTBAAInfo());
2835  AddToWorkList(N);
2836  CombineTo(LN0, Load, Load.getValue(1));
2837  return SDValue(N, 0); // Return N so it doesn't get rechecked!
2838  }
2839  }
2840  }
2841  }
2842 
2843  if (N0.getOpcode() == ISD::ADD && N1.getOpcode() == ISD::SRL &&
2844  VT.getSizeInBits() <= 64) {
2845  if (ConstantSDNode *ADDI = dyn_cast<ConstantSDNode>(N0.getOperand(1))) {
2846  APInt ADDC = ADDI->getAPIntValue();
2847  if (!TLI.isLegalAddImmediate(ADDC.getSExtValue())) {
2848  // Look for (and (add x, c1), (lshr y, c2)). If C1 wasn't a legal
2849  // immediate for an add, but it is legal if its top c2 bits are set,
2850  // transform the ADD so the immediate doesn't need to be materialized
2851  // in a register.
2852  if (ConstantSDNode *SRLI = dyn_cast<ConstantSDNode>(N1.getOperand(1))) {
2854  SRLI->getZExtValue());
2855  if (DAG.MaskedValueIsZero(N0.getOperand(1), Mask)) {
2856  ADDC |= Mask;
2857  if (TLI.isLegalAddImmediate(ADDC.getSExtValue())) {
2858  SDValue NewAdd =
2859  DAG.getNode(ISD::ADD, SDLoc(N0), VT,
2860  N0.getOperand(0), DAG.getConstant(ADDC, VT));
2861  CombineTo(N0.getNode(), NewAdd);
2862  return SDValue(N, 0); // Return N so it doesn't get rechecked!
2863  }
2864  }
2865  }
2866  }
2867  }
2868  }
2869 
2870  // fold (and (or (srl N, 8), (shl N, 8)), 0xffff) -> (srl (bswap N), const)
2871  if (N1C && N1C->getAPIntValue() == 0xffff && N0.getOpcode() == ISD::OR) {
2872  SDValue BSwap = MatchBSwapHWordLow(N0.getNode(), N0.getOperand(0),
2873  N0.getOperand(1), false);
2874  if (BSwap.getNode())
2875  return BSwap;
2876  }
2877 
2878  return SDValue();
2879 }
2880 
2881 /// MatchBSwapHWord - Match (a >> 8) | (a << 8) as (bswap a) >> 16
2882 ///
2883 SDValue DAGCombiner::MatchBSwapHWordLow(SDNode *N, SDValue N0, SDValue N1,
2884  bool DemandHighBits) {
2885  if (!LegalOperations)
2886  return SDValue();
2887 
2888  EVT VT = N->getValueType(0);
2889  if (VT != MVT::i64 && VT != MVT::i32 && VT != MVT::i16)
2890  return SDValue();
2891  if (!TLI.isOperationLegal(ISD::BSWAP, VT))
2892  return SDValue();
2893 
2894  // Recognize (and (shl a, 8), 0xff), (and (srl a, 8), 0xff00)
2895  bool LookPassAnd0 = false;
2896  bool LookPassAnd1 = false;
2897  if (N0.getOpcode() == ISD::AND && N0.getOperand(0).getOpcode() == ISD::SRL)
2898  std::swap(N0, N1);
2899  if (N1.getOpcode() == ISD::AND && N1.getOperand(0).getOpcode() == ISD::SHL)
2900  std::swap(N0, N1);
2901  if (N0.getOpcode() == ISD::AND) {
2902  if (!N0.getNode()->hasOneUse())
2903  return SDValue();
2905  if (!N01C || N01C->getZExtValue() != 0xFF00)
2906  return SDValue();
2907  N0 = N0.getOperand(0);
2908  LookPassAnd0 = true;
2909  }
2910 
2911  if (N1.getOpcode() == ISD::AND) {
2912  if (!N1.getNode()->hasOneUse())
2913  return SDValue();
2915  if (!N11C || N11C->getZExtValue() != 0xFF)
2916  return SDValue();
2917  N1 = N1.getOperand(0);
2918  LookPassAnd1 = true;
2919  }
2920 
2921  if (N0.getOpcode() == ISD::SRL && N1.getOpcode() == ISD::SHL)
2922  std::swap(N0, N1);
2923  if (N0.getOpcode() != ISD::SHL || N1.getOpcode() != ISD::SRL)
2924  return SDValue();
2925  if (!N0.getNode()->hasOneUse() ||
2926  !N1.getNode()->hasOneUse())
2927  return SDValue();
2928 
2931  if (!N01C || !N11C)
2932  return SDValue();
2933  if (N01C->getZExtValue() != 8 || N11C->getZExtValue() != 8)
2934  return SDValue();
2935 
2936  // Look for (shl (and a, 0xff), 8), (srl (and a, 0xff00), 8)
2937  SDValue N00 = N0->getOperand(0);
2938  if (!LookPassAnd0 && N00.getOpcode() == ISD::AND) {
2939  if (!N00.getNode()->hasOneUse())
2940  return SDValue();
2942  if (!N001C || N001C->getZExtValue() != 0xFF)
2943  return SDValue();
2944  N00 = N00.getOperand(0);
2945  LookPassAnd0 = true;
2946  }
2947 
2948  SDValue N10 = N1->getOperand(0);
2949  if (!LookPassAnd1 && N10.getOpcode() == ISD::AND) {
2950  if (!N10.getNode()->hasOneUse())
2951  return SDValue();
2953  if (!N101C || N101C->getZExtValue() != 0xFF00)
2954  return SDValue();
2955  N10 = N10.getOperand(0);
2956  LookPassAnd1 = true;
2957  }
2958 
2959  if (N00 != N10)
2960  return SDValue();
2961 
2962  // Make sure everything beyond the low halfword gets set to zero since the SRL
2963  // 16 will clear the top bits.
2964  unsigned OpSizeInBits = VT.getSizeInBits();
2965  if (DemandHighBits && OpSizeInBits > 16) {
2966  // If the left-shift isn't masked out then the only way this is a bswap is
2967  // if all bits beyond the low 8 are 0. In that case the entire pattern
2968  // reduces to a left shift anyway: leave it for other parts of the combiner.
2969  if (!LookPassAnd0)
2970  return SDValue();
2971 
2972  // However, if the right shift isn't masked out then it might be because
2973  // it's not needed. See if we can spot that too.
2974  if (!LookPassAnd1 &&
2975  !DAG.MaskedValueIsZero(
2976  N10, APInt::getHighBitsSet(OpSizeInBits, OpSizeInBits - 16)))
2977  return SDValue();
2978  }
2979 
2980  SDValue Res = DAG.getNode(ISD::BSWAP, SDLoc(N), VT, N00);
2981  if (OpSizeInBits > 16)
2982  Res = DAG.getNode(ISD::SRL, SDLoc(N), VT, Res,
2983  DAG.getConstant(OpSizeInBits-16, getShiftAmountTy(VT)));
2984  return Res;
2985 }
2986 
2987 /// isBSwapHWordElement - Return true if the specified node is an element
2988 /// that makes up a 32-bit packed halfword byteswap. i.e.
2989 /// ((x&0xff)<<8)|((x&0xff00)>>8)|((x&0x00ff0000)<<8)|((x&0xff000000)>>8)
2991  if (!N.getNode()->hasOneUse())
2992  return false;
2993 
2994  unsigned Opc = N.getOpcode();
2995  if (Opc != ISD::AND && Opc != ISD::SHL && Opc != ISD::SRL)
2996  return false;
2997 
2999  if (!N1C)
3000  return false;
3001 
3002  unsigned Num;
3003  switch (N1C->getZExtValue()) {
3004  default:
3005  return false;
3006  case 0xFF: Num = 0; break;
3007  case 0xFF00: Num = 1; break;
3008  case 0xFF0000: Num = 2; break;
3009  case 0xFF000000: Num = 3; break;
3010  }
3011 
3012  // Look for (x & 0xff) << 8 as well as ((x << 8) & 0xff00).
3013  SDValue N0 = N.getOperand(0);
3014  if (Opc == ISD::AND) {
3015  if (Num == 0 || Num == 2) {
3016  // (x >> 8) & 0xff
3017  // (x >> 8) & 0xff0000
3018  if (N0.getOpcode() != ISD::SRL)
3019  return false;
3021  if (!C || C->getZExtValue() != 8)
3022  return false;
3023  } else {
3024  // (x << 8) & 0xff00
3025  // (x << 8) & 0xff000000
3026  if (N0.getOpcode() != ISD::SHL)
3027  return false;
3029  if (!C || C->getZExtValue() != 8)
3030  return false;
3031  }
3032  } else if (Opc == ISD::SHL) {
3033  // (x & 0xff) << 8
3034  // (x & 0xff0000) << 8
3035  if (Num != 0 && Num != 2)
3036  return false;
3038  if (!C || C->getZExtValue() != 8)
3039  return false;
3040  } else { // Opc == ISD::SRL
3041  // (x & 0xff00) >> 8
3042  // (x & 0xff000000) >> 8
3043  if (Num != 1 && Num != 3)
3044  return false;
3046  if (!C || C->getZExtValue() != 8)
3047  return false;
3048  }
3049 
3050  if (Parts[Num])
3051  return false;
3052 
3053  Parts[Num] = N0.getOperand(0).getNode();
3054  return true;
3055 }
3056 
3057 /// MatchBSwapHWord - Match a 32-bit packed halfword bswap. That is
3058 /// ((x&0xff)<<8)|((x&0xff00)>>8)|((x&0x00ff0000)<<8)|((x&0xff000000)>>8)
3059 /// => (rotl (bswap x), 16)
3060 SDValue DAGCombiner::MatchBSwapHWord(SDNode *N, SDValue N0, SDValue N1) {
3061  if (!LegalOperations)
3062  return SDValue();
3063 
3064  EVT VT = N->getValueType(0);
3065  if (VT != MVT::i32)
3066  return SDValue();
3067  if (!TLI.isOperationLegal(ISD::BSWAP, VT))
3068  return SDValue();
3069 
3070  SmallVector<SDNode*,4> Parts(4, (SDNode*)0);
3071  // Look for either
3072  // (or (or (and), (and)), (or (and), (and)))
3073  // (or (or (or (and), (and)), (and)), (and))
3074  if (N0.getOpcode() != ISD::OR)
3075  return SDValue();
3076  SDValue N00 = N0.getOperand(0);
3077  SDValue N01 = N0.getOperand(1);
3078 
3079  if (N1.getOpcode() == ISD::OR &&
3080  N00.getNumOperands() == 2 && N01.getNumOperands() == 2) {
3081  // (or (or (and), (and)), (or (and), (and)))
3082  SDValue N000 = N00.getOperand(0);
3083  if (!isBSwapHWordElement(N000, Parts))
3084  return SDValue();
3085 
3086  SDValue N001 = N00.getOperand(1);
3087  if (!isBSwapHWordElement(N001, Parts))
3088  return SDValue();
3089  SDValue N010 = N01.getOperand(0);
3090  if (!isBSwapHWordElement(N010, Parts))
3091  return SDValue();
3092  SDValue N011 = N01.getOperand(1);
3093  if (!isBSwapHWordElement(N011, Parts))
3094  return SDValue();
3095  } else {
3096  // (or (or (or (and), (and)), (and)), (and))
3097  if (!isBSwapHWordElement(N1, Parts))
3098  return SDValue();
3099  if (!isBSwapHWordElement(N01, Parts))
3100  return SDValue();
3101  if (N00.getOpcode() != ISD::OR)
3102  return SDValue();
3103  SDValue N000 = N00.getOperand(0);
3104  if (!isBSwapHWordElement(N000, Parts))
3105  return SDValue();
3106  SDValue N001 = N00.getOperand(1);
3107  if (!isBSwapHWordElement(N001, Parts))
3108  return SDValue();
3109  }
3110 
3111  // Make sure the parts are all coming from the same node.
3112  if (Parts[0] != Parts[1] || Parts[0] != Parts[2] || Parts[0] != Parts[3])
3113  return SDValue();
3114 
3115  SDValue BSwap = DAG.getNode(ISD::BSWAP, SDLoc(N), VT,
3116  SDValue(Parts[0],0));
3117 
3118  // Result of the bswap should be rotated by 16. If it's not legal, then
3119  // do (x << 16) | (x >> 16).
3120  SDValue ShAmt = DAG.getConstant(16, getShiftAmountTy(VT));
3121  if (TLI.isOperationLegalOrCustom(ISD::ROTL, VT))
3122  return DAG.getNode(ISD::ROTL, SDLoc(N), VT, BSwap, ShAmt);
3123  if (TLI.isOperationLegalOrCustom(ISD::ROTR, VT))
3124  return DAG.getNode(ISD::ROTR, SDLoc(N), VT, BSwap, ShAmt);
3125  return DAG.getNode(ISD::OR, SDLoc(N), VT,
3126  DAG.getNode(ISD::SHL, SDLoc(N), VT, BSwap, ShAmt),
3127  DAG.getNode(ISD::SRL, SDLoc(N), VT, BSwap, ShAmt));
3128 }
3129 
3130 SDValue DAGCombiner::visitOR(SDNode *N) {
3131  SDValue N0 = N->getOperand(0);
3132  SDValue N1 = N->getOperand(1);
3133  SDValue LL, LR, RL, RR, CC0, CC1;
3136  EVT VT = N1.getValueType();
3137 
3138  // fold vector ops
3139  if (VT.isVector()) {
3140  SDValue FoldedVOp = SimplifyVBinOp(N);
3141  if (FoldedVOp.getNode()) return FoldedVOp;
3142 
3143  // fold (or x, 0) -> x, vector edition
3145  return N1;
3147  return N0;
3148 
3149  // fold (or x, -1) -> -1, vector edition
3151  return N0;
3153  return N1;
3154  }
3155 
3156  // fold (or x, undef) -> -1
3157  if (!LegalOperations &&
3158  (N0.getOpcode() == ISD::UNDEF || N1.getOpcode() == ISD::UNDEF)) {
3159  EVT EltVT = VT.isVector() ? VT.getVectorElementType() : VT;
3160  return DAG.getConstant(APInt::getAllOnesValue(EltVT.getSizeInBits()), VT);
3161  }
3162  // fold (or c1, c2) -> c1|c2
3163  if (N0C && N1C)
3164  return DAG.FoldConstantArithmetic(ISD::OR, VT, N0C, N1C);
3165  // canonicalize constant to RHS
3166  if (N0C && !N1C)
3167  return DAG.getNode(ISD::OR, SDLoc(N), VT, N1, N0);
3168  // fold (or x, 0) -> x
3169  if (N1C && N1C->isNullValue())
3170  return N0;
3171  // fold (or x, -1) -> -1
3172  if (N1C && N1C->isAllOnesValue())
3173  return N1;
3174  // fold (or x, c) -> c iff (x & ~c) == 0
3175  if (N1C && DAG.MaskedValueIsZero(N0, ~N1C->getAPIntValue()))
3176  return N1;
3177 
3178  // Recognize halfword bswaps as (bswap + rotl 16) or (bswap + shl 16)
3179  SDValue BSwap = MatchBSwapHWord(N, N0, N1);
3180  if (BSwap.getNode() != 0)
3181  return BSwap;
3182  BSwap = MatchBSwapHWordLow(N, N0, N1);
3183  if (BSwap.getNode() != 0)
3184  return BSwap;
3185 
3186  // reassociate or
3187  SDValue ROR = ReassociateOps(ISD::OR, SDLoc(N), N0, N1);
3188  if (ROR.getNode() != 0)
3189  return ROR;
3190  // Canonicalize (or (and X, c1), c2) -> (and (or X, c2), c1|c2)
3191  // iff (c1 & c2) == 0.
3192  if (N1C && N0.getOpcode() == ISD::AND && N0.getNode()->hasOneUse() &&
3193  isa<ConstantSDNode>(N0.getOperand(1))) {
3194  ConstantSDNode *C1 = cast<ConstantSDNode>(N0.getOperand(1));
3195  if ((C1->getAPIntValue() & N1C->getAPIntValue()) != 0)
3196  return DAG.getNode(ISD::AND, SDLoc(N), VT,
3197  DAG.getNode(ISD::OR, SDLoc(N0), VT,
3198  N0.getOperand(0), N1),
3199  DAG.FoldConstantArithmetic(ISD::OR, VT, N1C, C1));
3200  }
3201  // fold (or (setcc x), (setcc y)) -> (setcc (or x, y))
3202  if (isSetCCEquivalent(N0, LL, LR, CC0) && isSetCCEquivalent(N1, RL, RR, CC1)){
3203  ISD::CondCode Op0 = cast<CondCodeSDNode>(CC0)->get();
3204  ISD::CondCode Op1 = cast<CondCodeSDNode>(CC1)->get();
3205 
3206  if (LR == RR && isa<ConstantSDNode>(LR) && Op0 == Op1 &&
3207  LL.getValueType().isInteger()) {
3208  // fold (or (setne X, 0), (setne Y, 0)) -> (setne (or X, Y), 0)
3209  // fold (or (setlt X, 0), (setlt Y, 0)) -> (setne (or X, Y), 0)
3210  if (cast<ConstantSDNode>(LR)->isNullValue() &&
3211  (Op1 == ISD::SETNE || Op1 == ISD::SETLT)) {
3212  SDValue ORNode = DAG.getNode(ISD::OR, SDLoc(LR),
3213  LR.getValueType(), LL, RL);
3214  AddToWorkList(ORNode.getNode());
3215  return DAG.getSetCC(SDLoc(N), VT, ORNode, LR, Op1);
3216  }
3217  // fold (or (setne X, -1), (setne Y, -1)) -> (setne (and X, Y), -1)
3218  // fold (or (setgt X, -1), (setgt Y -1)) -> (setgt (and X, Y), -1)
3219  if (cast<ConstantSDNode>(LR)->isAllOnesValue() &&
3220  (Op1 == ISD::SETNE || Op1 == ISD::SETGT)) {
3221  SDValue ANDNode = DAG.getNode(ISD::AND, SDLoc(LR),
3222  LR.getValueType(), LL, RL);
3223  AddToWorkList(ANDNode.getNode());
3224  return DAG.getSetCC(SDLoc(N), VT, ANDNode, LR, Op1);
3225  }
3226  }
3227  // canonicalize equivalent to ll == rl
3228  if (LL == RR && LR == RL) {
3229  Op1 = ISD::getSetCCSwappedOperands(Op1);
3230  std::swap(RL, RR);
3231  }
3232  if (LL == RL && LR == RR) {
3233  bool isInteger = LL.getValueType().isInteger();
3234  ISD::CondCode Result = ISD::getSetCCOrOperation(Op0, Op1, isInteger);
3235  if (Result != ISD::SETCC_INVALID &&
3236  (!LegalOperations ||
3237  (TLI.isCondCodeLegal(Result, LL.getSimpleValueType()) &&
3238  TLI.isOperationLegal(ISD::SETCC,
3240  return DAG.getSetCC(SDLoc(N), N0.getValueType(),
3241  LL, LR, Result);
3242  }
3243  }
3244 
3245  // Simplify: (or (op x...), (op y...)) -> (op (or x, y))
3246  if (N0.getOpcode() == N1.getOpcode()) {
3247  SDValue Tmp = SimplifyBinOpWithSameOpcodeHands(N);
3248  if (Tmp.getNode()) return Tmp;
3249  }
3250 
3251  // (or (and X, C1), (and Y, C2)) -> (and (or X, Y), C3) if possible.
3252  if (N0.getOpcode() == ISD::AND &&
3253  N1.getOpcode() == ISD::AND &&
3254  N0.getOperand(1).getOpcode() == ISD::Constant &&
3255  N1.getOperand(1).getOpcode() == ISD::Constant &&
3256  // Don't increase # computations.
3257  (N0.getNode()->hasOneUse() || N1.getNode()->hasOneUse())) {
3258  // We can only do this xform if we know that bits from X that are set in C2
3259  // but not in C1 are already zero. Likewise for Y.
3260  const APInt &LHSMask =
3261  cast<ConstantSDNode>(N0.getOperand(1))->getAPIntValue();
3262  const APInt &RHSMask =
3263  cast<ConstantSDNode>(N1.getOperand(1))->getAPIntValue();
3264 
3265  if (DAG.MaskedValueIsZero(N0.getOperand(0), RHSMask&~LHSMask) &&
3266  DAG.MaskedValueIsZero(N1.getOperand(0), LHSMask&~RHSMask)) {
3267  SDValue X = DAG.getNode(ISD::OR, SDLoc(N0), VT,
3268  N0.getOperand(0), N1.getOperand(0));
3269  return DAG.getNode(ISD::AND, SDLoc(N), VT, X,
3270  DAG.getConstant(LHSMask | RHSMask, VT));
3271  }
3272  }
3273 
3274  // See if this is some rotate idiom.
3275  if (SDNode *Rot = MatchRotate(N0, N1, SDLoc(N)))
3276  return SDValue(Rot, 0);
3277 
3278  // Simplify the operands using demanded-bits information.
3279  if (!VT.isVector() &&
3281  return SDValue(N, 0);
3282 
3283  return SDValue();
3284 }
3285 
3286 /// MatchRotateHalf - Match "(X shl/srl V1) & V2" where V2 may not be present.
3287 static bool MatchRotateHalf(SDValue Op, SDValue &Shift, SDValue &Mask) {
3288  if (Op.getOpcode() == ISD::AND) {
3289  if (isa<ConstantSDNode>(Op.getOperand(1))) {
3290  Mask = Op.getOperand(1);
3291  Op = Op.getOperand(0);
3292  } else {
3293  return false;
3294  }
3295  }
3296 
3297  if (Op.getOpcode() == ISD::SRL || Op.getOpcode() == ISD::SHL) {
3298  Shift = Op;
3299  return true;
3300  }
3301 
3302  return false;
3303 }
3304 
3305 // MatchRotate - Handle an 'or' of two operands. If this is one of the many
3306 // idioms for rotate, and if the target supports rotation instructions, generate
3307 // a rot[lr].
3308 SDNode *DAGCombiner::MatchRotate(SDValue LHS, SDValue RHS, SDLoc DL) {
3309  // Must be a legal type. Expanded 'n promoted things won't work with rotates.
3310  EVT VT = LHS.getValueType();
3311  if (!TLI.isTypeLegal(VT)) return 0;
3312 
3313  // The target must have at least one rotate flavor.
3314  bool HasROTL = TLI.isOperationLegalOrCustom(ISD::ROTL, VT);
3315  bool HasROTR = TLI.isOperationLegalOrCustom(ISD::ROTR, VT);
3316  if (!HasROTL && !HasROTR) return 0;
3317 
3318  // Match "(X shl/srl V1) & V2" where V2 may not be present.
3319  SDValue LHSShift; // The shift.
3320  SDValue LHSMask; // AND value if any.
3321  if (!MatchRotateHalf(LHS, LHSShift, LHSMask))
3322  return 0; // Not part of a rotate.
3323 
3324  SDValue RHSShift; // The shift.
3325  SDValue RHSMask; // AND value if any.
3326  if (!MatchRotateHalf(RHS, RHSShift, RHSMask))
3327  return 0; // Not part of a rotate.
3328 
3329  if (LHSShift.getOperand(0) != RHSShift.getOperand(0))
3330  return 0; // Not shifting the same value.
3331 
3332  if (LHSShift.getOpcode() == RHSShift.getOpcode())
3333  return 0; // Shifts must disagree.
3334 
3335  // Canonicalize shl to left side in a shl/srl pair.
3336  if (RHSShift.getOpcode() == ISD::SHL) {
3337  std::swap(LHS, RHS);
3338  std::swap(LHSShift, RHSShift);
3339  std::swap(LHSMask , RHSMask );
3340  }
3341 
3342  unsigned OpSizeInBits = VT.getSizeInBits();
3343  SDValue LHSShiftArg = LHSShift.getOperand(0);
3344  SDValue LHSShiftAmt = LHSShift.getOperand(1);
3345  SDValue RHSShiftArg = RHSShift.getOperand(0);
3346  SDValue RHSShiftAmt = RHSShift.getOperand(1);
3347 
3348  // fold (or (shl x, C1), (srl x, C2)) -> (rotl x, C1)
3349  // fold (or (shl x, C1), (srl x, C2)) -> (rotr x, C2)
3350  if (LHSShiftAmt.getOpcode() == ISD::Constant &&
3351  RHSShiftAmt.getOpcode() == ISD::Constant) {
3352  uint64_t LShVal = cast<ConstantSDNode>(LHSShiftAmt)->getZExtValue();
3353  uint64_t RShVal = cast<ConstantSDNode>(RHSShiftAmt)->getZExtValue();
3354  if ((LShVal + RShVal) != OpSizeInBits)
3355  return 0;
3356 
3357  SDValue Rot = DAG.getNode(HasROTL ? ISD::ROTL : ISD::ROTR, DL, VT,
3358  LHSShiftArg, HasROTL ? LHSShiftAmt : RHSShiftAmt);
3359 
3360  // If there is an AND of either shifted operand, apply it to the result.
3361  if (LHSMask.getNode() || RHSMask.getNode()) {
3362  APInt Mask = APInt::getAllOnesValue(OpSizeInBits);
3363 
3364  if (LHSMask.getNode()) {
3365  APInt RHSBits = APInt::getLowBitsSet(OpSizeInBits, LShVal);
3366  Mask &= cast<ConstantSDNode>(LHSMask)->getAPIntValue() | RHSBits;
3367  }
3368  if (RHSMask.getNode()) {
3369  APInt LHSBits = APInt::getHighBitsSet(OpSizeInBits, RShVal);
3370  Mask &= cast<ConstantSDNode>(RHSMask)->getAPIntValue() | LHSBits;
3371  }
3372 
3373  Rot = DAG.getNode(ISD::AND, DL, VT, Rot, DAG.getConstant(Mask, VT));
3374  }
3375 
3376  return Rot.getNode();
3377  }
3378 
3379  // If there is a mask here, and we have a variable shift, we can't be sure
3380  // that we're masking out the right stuff.
3381  if (LHSMask.getNode() || RHSMask.getNode())
3382  return 0;
3383 
3384  // If the shift amount is sign/zext/any-extended just peel it off.
3385  SDValue LExtOp0 = LHSShiftAmt;
3386  SDValue RExtOp0 = RHSShiftAmt;
3387  if ((LHSShiftAmt.getOpcode() == ISD::SIGN_EXTEND ||
3388  LHSShiftAmt.getOpcode() == ISD::ZERO_EXTEND ||
3389  LHSShiftAmt.getOpcode() == ISD::ANY_EXTEND ||
3390  LHSShiftAmt.getOpcode() == ISD::TRUNCATE) &&
3391  (RHSShiftAmt.getOpcode() == ISD::SIGN_EXTEND ||
3392  RHSShiftAmt.getOpcode() == ISD::ZERO_EXTEND ||
3393  RHSShiftAmt.getOpcode() == ISD::ANY_EXTEND ||
3394  RHSShiftAmt.getOpcode() == ISD::TRUNCATE)) {
3395  LExtOp0 = LHSShiftAmt.getOperand(0);
3396  RExtOp0 = RHSShiftAmt.getOperand(0);
3397  }
3398 
3399  if (RExtOp0.getOpcode() == ISD::SUB && RExtOp0.getOperand(1) == LExtOp0) {
3400  // fold (or (shl x, (*ext y)), (srl x, (*ext (sub 32, y)))) ->
3401  // (rotl x, y)
3402  // fold (or (shl x, (*ext y)), (srl x, (*ext (sub 32, y)))) ->
3403  // (rotr x, (sub 32, y))
3404  if (ConstantSDNode *SUBC =
3405  dyn_cast<ConstantSDNode>(RExtOp0.getOperand(0))) {
3406  if (SUBC->getAPIntValue() == OpSizeInBits) {
3407  return DAG.getNode(HasROTL ? ISD::ROTL : ISD::ROTR, DL, VT, LHSShiftArg,
3408  HasROTL ? LHSShiftAmt : RHSShiftAmt).getNode();
3409  } else if (LHSShiftArg.getOpcode() == ISD::ZERO_EXTEND ||
3410  LHSShiftArg.getOpcode() == ISD::ANY_EXTEND) {
3411  // fold (or (shl (*ext x), (*ext y)),
3412  // (srl (*ext x), (*ext (sub 32, y)))) ->
3413  // (*ext (rotl x, y))
3414  // fold (or (shl (*ext x), (*ext y)),
3415  // (srl (*ext x), (*ext (sub 32, y)))) ->
3416  // (*ext (rotr x, (sub 32, y)))
3417  SDValue LArgExtOp0 = LHSShiftArg.getOperand(0);
3418  EVT LArgVT = LArgExtOp0.getValueType();
3419  bool HasROTRWithLArg = TLI.isOperationLegalOrCustom(ISD::ROTR, LArgVT);
3420  bool HasROTLWithLArg = TLI.isOperationLegalOrCustom(ISD::ROTL, LArgVT);
3421  if (HasROTRWithLArg || HasROTLWithLArg) {
3422  if (LArgVT.getSizeInBits() == SUBC->getAPIntValue()) {
3423  SDValue V =
3424  DAG.getNode(HasROTLWithLArg ? ISD::ROTL : ISD::ROTR, DL, LArgVT,
3425  LArgExtOp0, HasROTL ? LHSShiftAmt : RHSShiftAmt);
3426  return DAG.getNode(LHSShiftArg.getOpcode(), DL, VT, V).getNode();
3427  }
3428  }
3429  }
3430  }
3431  } else if (LExtOp0.getOpcode() == ISD::SUB &&
3432  RExtOp0 == LExtOp0.getOperand(1)) {
3433  // fold (or (shl x, (*ext (sub 32, y))), (srl x, (*ext y))) ->
3434  // (rotr x, y)
3435  // fold (or (shl x, (*ext (sub 32, y))), (srl x, (*ext y))) ->
3436  // (rotl x, (sub 32, y))
3437  if (ConstantSDNode *SUBC =
3438  dyn_cast<ConstantSDNode>(LExtOp0.getOperand(0))) {
3439  if (SUBC->getAPIntValue() == OpSizeInBits) {
3440  return DAG.getNode(HasROTR ? ISD::ROTR : ISD::ROTL, DL, VT, LHSShiftArg,
3441  HasROTR ? RHSShiftAmt : LHSShiftAmt).getNode();
3442  } else if (RHSShiftArg.getOpcode() == ISD::ZERO_EXTEND ||
3443  RHSShiftArg.getOpcode() == ISD::ANY_EXTEND) {
3444  // fold (or (shl (*ext x), (*ext (sub 32, y))),
3445  // (srl (*ext x), (*ext y))) ->
3446  // (*ext (rotl x, y))
3447  // fold (or (shl (*ext x), (*ext (sub 32, y))),
3448  // (srl (*ext x), (*ext y))) ->
3449  // (*ext (rotr x, (sub 32, y)))
3450  SDValue RArgExtOp0 = RHSShiftArg.getOperand(0);
3451  EVT RArgVT = RArgExtOp0.getValueType();
3452  bool HasROTRWithRArg = TLI.isOperationLegalOrCustom(ISD::ROTR, RArgVT);
3453  bool HasROTLWithRArg = TLI.isOperationLegalOrCustom(ISD::ROTL, RArgVT);
3454  if (HasROTRWithRArg || HasROTLWithRArg) {
3455  if (RArgVT.getSizeInBits() == SUBC->getAPIntValue()) {
3456  SDValue V =
3457  DAG.getNode(HasROTRWithRArg ? ISD::ROTR : ISD::ROTL, DL, RArgVT,
3458  RArgExtOp0, HasROTR ? RHSShiftAmt : LHSShiftAmt);
3459  return DAG.getNode(RHSShiftArg.getOpcode(), DL, VT, V).getNode();
3460  }
3461  }
3462  }
3463  }
3464  }
3465 
3466  return 0;
3467 }
3468 
3469 SDValue DAGCombiner::visitXOR(SDNode *N) {
3470  SDValue N0 = N->getOperand(0);
3471  SDValue N1 = N->getOperand(1);
3472  SDValue LHS, RHS, CC;
3475  EVT VT = N0.getValueType();
3476 
3477  // fold vector ops
3478  if (VT.isVector()) {
3479  SDValue FoldedVOp = SimplifyVBinOp(N);
3480  if (FoldedVOp.getNode()) return FoldedVOp;
3481 
3482  // fold (xor x, 0) -> x, vector edition
3484  return N1;
3486  return N0;
3487  }
3488 
3489  // fold (xor undef, undef) -> 0. This is a common idiom (misuse).
3490  if (N0.getOpcode() == ISD::UNDEF && N1.getOpcode() == ISD::UNDEF)
3491  return DAG.getConstant(0, VT);
3492  // fold (xor x, undef) -> undef
3493  if (N0.getOpcode() == ISD::UNDEF)
3494  return N0;
3495  if (N1.getOpcode() == ISD::UNDEF)
3496  return N1;
3497  // fold (xor c1, c2) -> c1^c2
3498  if (N0C && N1C)
3499  return DAG.FoldConstantArithmetic(ISD::XOR, VT, N0C, N1C);
3500  // canonicalize constant to RHS
3501  if (N0C && !N1C)
3502  return DAG.getNode(ISD::XOR, SDLoc(N), VT, N1, N0);
3503  // fold (xor x, 0) -> x
3504  if (N1C && N1C->isNullValue())
3505  return N0;
3506  // reassociate xor
3507  SDValue RXOR = ReassociateOps(ISD::XOR, SDLoc(N), N0, N1);
3508  if (RXOR.getNode() != 0)
3509  return RXOR;
3510 
3511  // fold !(x cc y) -> (x !cc y)
3512  if (N1C && N1C->getAPIntValue() == 1 && isSetCCEquivalent(N0, LHS, RHS, CC)) {
3513  bool isInt = LHS.getValueType().isInteger();
3514  ISD::CondCode NotCC = ISD::getSetCCInverse(cast<CondCodeSDNode>(CC)->get(),
3515  isInt);
3516 
3517  if (!LegalOperations ||
3518  TLI.isCondCodeLegal(NotCC, LHS.getSimpleValueType())) {
3519  switch (N0.getOpcode()) {
3520  default:
3521  llvm_unreachable("Unhandled SetCC Equivalent!");
3522  case ISD::SETCC:
3523  return DAG.getSetCC(SDLoc(N), VT, LHS, RHS, NotCC);
3524  case ISD::SELECT_CC:
3525  return DAG.getSelectCC(SDLoc(N), LHS, RHS, N0.getOperand(2),
3526  N0.getOperand(3), NotCC);
3527  }
3528  }
3529  }
3530 
3531  // fold (not (zext (setcc x, y))) -> (zext (not (setcc x, y)))
3532  if (N1C && N1C->getAPIntValue() == 1 && N0.getOpcode() == ISD::ZERO_EXTEND &&
3533  N0.getNode()->hasOneUse() &&
3534  isSetCCEquivalent(N0.getOperand(0), LHS, RHS, CC)){
3535  SDValue V = N0.getOperand(0);
3536  V = DAG.getNode(ISD::XOR, SDLoc(N0), V.getValueType(), V,
3537  DAG.getConstant(1, V.getValueType()));
3538  AddToWorkList(V.getNode());
3539  return DAG.getNode(ISD::ZERO_EXTEND, SDLoc(N), VT, V);
3540  }
3541 
3542  // fold (not (or x, y)) -> (and (not x), (not y)) iff x or y are setcc
3543  if (N1C && N1C->getAPIntValue() == 1 && VT == MVT::i1 &&
3544  (N0.getOpcode() == ISD::OR || N0.getOpcode() == ISD::AND)) {
3545  SDValue LHS = N0.getOperand(0), RHS = N0.getOperand(1);
3546  if (isOneUseSetCC(RHS) || isOneUseSetCC(LHS)) {
3547  unsigned NewOpcode = N0.getOpcode() == ISD::AND ? ISD::OR : ISD::AND;
3548  LHS = DAG.getNode(ISD::XOR, SDLoc(LHS), VT, LHS, N1); // LHS = ~LHS
3549  RHS = DAG.getNode(ISD::XOR, SDLoc(RHS), VT, RHS, N1); // RHS = ~RHS
3550  AddToWorkList(LHS.getNode()); AddToWorkList(RHS.getNode());
3551  return DAG.getNode(NewOpcode, SDLoc(N), VT, LHS, RHS);
3552  }
3553  }
3554  // fold (not (or x, y)) -> (and (not x), (not y)) iff x or y are constants
3555  if (N1C && N1C->isAllOnesValue() &&
3556  (N0.getOpcode() == ISD::OR || N0.getOpcode() == ISD::AND)) {
3557  SDValue LHS = N0.getOperand(0), RHS = N0.getOperand(1);
3558  if (isa<ConstantSDNode>(RHS) || isa<ConstantSDNode>(LHS)) {
3559  unsigned NewOpcode = N0.getOpcode() == ISD::AND ? ISD::OR : ISD::AND;
3560  LHS = DAG.getNode(ISD::XOR, SDLoc(LHS), VT, LHS, N1); // LHS = ~LHS
3561  RHS = DAG.getNode(ISD::XOR, SDLoc(RHS), VT, RHS, N1); // RHS = ~RHS
3562  AddToWorkList(LHS.getNode()); AddToWorkList(RHS.getNode());
3563  return DAG.getNode(NewOpcode, SDLoc(N), VT, LHS, RHS);
3564  }
3565  }
3566  // fold (xor (and x, y), y) -> (and (not x), y)
3567  if (N0.getOpcode() == ISD::AND && N0.getNode()->hasOneUse() &&
3568  N0->getOperand(1) == N1) {
3569  SDValue X = N0->getOperand(0);
3570  SDValue NotX = DAG.getNOT(SDLoc(X), X, VT);
3571  AddToWorkList(NotX.getNode());
3572  return DAG.getNode(ISD::AND, SDLoc(N), VT, NotX, N1);
3573  }
3574  // fold (xor (xor x, c1), c2) -> (xor x, (xor c1, c2))
3575  if (N1C && N0.getOpcode() == ISD::XOR) {
3578  if (N00C)
3579  return DAG.getNode(ISD::XOR, SDLoc(N), VT, N0.getOperand(1),
3580  DAG.getConstant(N1C->getAPIntValue() ^
3581  N00C->getAPIntValue(), VT));
3582  if (N01C)
3583  return DAG.getNode(ISD::XOR, SDLoc(N), VT, N0.getOperand(0),
3584  DAG.getConstant(N1C->getAPIntValue() ^
3585  N01C->getAPIntValue(), VT));
3586  }
3587  // fold (xor x, x) -> 0
3588  if (N0 == N1)
3589  return tryFoldToZero(SDLoc(N), TLI, VT, DAG, LegalOperations, LegalTypes);
3590 
3591  // Simplify: xor (op x...), (op y...) -> (op (xor x, y))
3592  if (N0.getOpcode() == N1.getOpcode()) {
3593  SDValue Tmp = SimplifyBinOpWithSameOpcodeHands(N);
3594  if (Tmp.getNode()) return Tmp;
3595  }
3596 
3597  // Simplify the expression using non-local knowledge.
3598  if (!VT.isVector() &&
3600  return SDValue(N, 0);
3601 
3602  return SDValue();
3603 }
3604 
3605 /// visitShiftByConstant - Handle transforms common to the three shifts, when
3606 /// the shift amount is a constant.
3607 SDValue DAGCombiner::visitShiftByConstant(SDNode *N, unsigned Amt) {
3608  SDNode *LHS = N->getOperand(0).getNode();
3609  if (!LHS->hasOneUse()) return SDValue();
3610 
3611  // We want to pull some binops through shifts, so that we have (and (shift))
3612  // instead of (shift (and)), likewise for add, or, xor, etc. This sort of
3613  // thing happens with address calculations, so it's important to canonicalize
3614  // it.
3615  bool HighBitSet = false; // Can we transform this if the high bit is set?
3616 
3617  switch (LHS->getOpcode()) {
3618  default: return SDValue();
3619  case ISD::OR:
3620  case ISD::XOR:
3621  HighBitSet = false; // We can only transform sra if the high bit is clear.
3622  break;
3623  case ISD::AND:
3624  HighBitSet = true; // We can only transform sra if the high bit is set.
3625  break;
3626  case ISD::ADD:
3627  if (N->getOpcode() != ISD::SHL)
3628  return SDValue(); // only shl(add) not sr[al](add).
3629  HighBitSet = false; // We can only transform sra if the high bit is clear.
3630  break;
3631  }
3632 
3633  // We require the RHS of the binop to be a constant as well.
3634  ConstantSDNode *BinOpCst = dyn_cast<ConstantSDNode>(LHS->getOperand(1));
3635  if (!BinOpCst) return SDValue();
3636 
3637  // FIXME: disable this unless the input to the binop is a shift by a constant.
3638  // If it is not a shift, it pessimizes some common cases like:
3639  //
3640  // void foo(int *X, int i) { X[i & 1235] = 1; }
3641  // int bar(int *X, int i) { return X[i & 255]; }
3642  SDNode *BinOpLHSVal = LHS->getOperand(0).getNode();
3643  if ((BinOpLHSVal->getOpcode() != ISD::SHL &&
3644  BinOpLHSVal->getOpcode() != ISD::SRA &&
3645  BinOpLHSVal->getOpcode() != ISD::SRL) ||
3646  !isa<ConstantSDNode>(BinOpLHSVal->getOperand(1)))
3647  return SDValue();
3648 
3649  EVT VT = N->getValueType(0);
3650 
3651  // If this is a signed shift right, and the high bit is modified by the
3652  // logical operation, do not perform the transformation. The highBitSet
3653  // boolean indicates the value of the high bit of the constant which would
3654  // cause it to be modified for this operation.
3655  if (N->getOpcode() == ISD::SRA) {
3656  bool BinOpRHSSignSet = BinOpCst->getAPIntValue().isNegative();
3657  if (BinOpRHSSignSet != HighBitSet)
3658  return SDValue();
3659  }
3660 
3661  // Fold the constants, shifting the binop RHS by the shift amount.
3662  SDValue NewRHS = DAG.getNode(N->getOpcode(), SDLoc(LHS->getOperand(1)),
3663  N->getValueType(0),
3664  LHS->getOperand(1), N->getOperand(1));
3665 
3666  // Create the new shift.
3667  SDValue NewShift = DAG.getNode(N->getOpcode(),
3668  SDLoc(LHS->getOperand(0)),
3669  VT, LHS->getOperand(0), N->getOperand(1));
3670 
3671  // Create the new binop.
3672  return DAG.getNode(LHS->getOpcode(), SDLoc(N), VT, NewShift, NewRHS);
3673 }
3674 
3675 SDValue DAGCombiner::visitSHL(SDNode *N) {
3676  SDValue N0 = N->getOperand(0);
3677  SDValue N1 = N->getOperand(1);
3680  EVT VT = N0.getValueType();
3681  unsigned OpSizeInBits = VT.getScalarType().getSizeInBits();
3682 
3683  // fold vector ops
3684  if (VT.isVector()) {
3685  SDValue FoldedVOp = SimplifyVBinOp(N);
3686  if (FoldedVOp.getNode()) return FoldedVOp;
3687  }
3688 
3689  // fold (shl c1, c2) -> c1<<c2
3690  if (N0C && N1C)
3691  return DAG.FoldConstantArithmetic(ISD::SHL, VT, N0C, N1C);
3692  // fold (shl 0, x) -> 0
3693  if (N0C && N0C->isNullValue())
3694  return N0;
3695  // fold (shl x, c >= size(x)) -> undef
3696  if (N1C && N1C->getZExtValue() >= OpSizeInBits)
3697  return DAG.getUNDEF(VT);
3698  // fold (shl x, 0) -> x
3699  if (N1C && N1C->isNullValue())
3700  return N0;
3701  // fold (shl undef, x) -> 0
3702  if (N0.getOpcode() == ISD::UNDEF)
3703  return DAG.getConstant(0, VT);
3704  // if (shl x, c) is known to be zero, return 0
3705  if (DAG.MaskedValueIsZero(SDValue(N, 0),
3706  APInt::getAllOnesValue(OpSizeInBits)))
3707  return DAG.getConstant(0, VT);
3708  // fold (shl x, (trunc (and y, c))) -> (shl x, (and (trunc y), (trunc c))).
3709  if (N1.getOpcode() == ISD::TRUNCATE &&
3710  N1.getOperand(0).getOpcode() == ISD::AND &&
3711  N1.hasOneUse() && N1.getOperand(0).hasOneUse()) {
3712  SDValue N101 = N1.getOperand(0).getOperand(1);
3713  if (ConstantSDNode *N101C = dyn_cast<ConstantSDNode>(N101)) {
3714  EVT TruncVT = N1.getValueType();
3715  SDValue N100 = N1.getOperand(0).getOperand(0);
3716  APInt TruncC = N101C->getAPIntValue();
3717  TruncC = TruncC.trunc(TruncVT.getSizeInBits());
3718  return DAG.getNode(ISD::SHL, SDLoc(N), VT, N0,
3719  DAG.getNode(ISD::AND, SDLoc(N), TruncVT,
3720  DAG.getNode(ISD::TRUNCATE,
3721  SDLoc(N),
3722  TruncVT, N100),
3723  DAG.getConstant(TruncC, TruncVT)));
3724  }
3725  }
3726 
3727  if (N1C && SimplifyDemandedBits(SDValue(N, 0)))
3728  return SDValue(N, 0);
3729 
3730  // fold (shl (shl x, c1), c2) -> 0 or (shl x, (add c1, c2))
3731  if (N1C && N0.getOpcode() == ISD::SHL &&
3732  N0.getOperand(1).getOpcode() == ISD::Constant) {
3733  uint64_t c1 = cast<ConstantSDNode>(N0.getOperand(1))->getZExtValue();
3734  uint64_t c2 = N1C->getZExtValue();
3735  if (c1 + c2 >= OpSizeInBits)
3736  return DAG.getConstant(0, VT);
3737  return DAG.getNode(ISD::SHL, SDLoc(N), VT, N0.getOperand(0),
3738  DAG.getConstant(c1 + c2, N1.getValueType()));
3739  }
3740 
3741  // fold (shl (ext (shl x, c1)), c2) -> (ext (shl x, (add c1, c2)))
3742  // For this to be valid, the second form must not preserve any of the bits
3743  // that are shifted out by the inner shift in the first form. This means
3744  // the outer shift size must be >= the number of bits added by the ext.
3745  // As a corollary, we don't care what kind of ext it is.
3746  if (N1C && (N0.getOpcode() == ISD::ZERO_EXTEND ||
3747  N0.getOpcode() == ISD::ANY_EXTEND ||
3748  N0.getOpcode() == ISD::SIGN_EXTEND) &&
3749  N0.getOperand(0).getOpcode() == ISD::SHL &&
3750  isa<ConstantSDNode>(N0.getOperand(0)->getOperand(1))) {
3751  uint64_t c1 =
3752  cast<ConstantSDNode>(N0.getOperand(0)->getOperand(1))->getZExtValue();
3753  uint64_t c2 = N1C->getZExtValue();
3754  EVT InnerShiftVT = N0.getOperand(0).getValueType();
3755  uint64_t InnerShiftSize = InnerShiftVT.getScalarType().getSizeInBits();
3756  if (c2 >= OpSizeInBits - InnerShiftSize) {
3757  if (c1 + c2 >= OpSizeInBits)
3758  return DAG.getConstant(0, VT);
3759  return DAG.getNode(ISD::SHL, SDLoc(N0), VT,
3760  DAG.getNode(N0.getOpcode(), SDLoc(N0), VT,
3761  N0.getOperand(0)->getOperand(0)),
3762  DAG.getConstant(c1 + c2, N1.getValueType()));
3763  }
3764  }
3765 
3766  // fold (shl (zext (srl x, C)), C) -> (zext (shl (srl x, C), C))
3767  // Only fold this if the inner zext has no other uses to avoid increasing
3768  // the total number of instructions.
3769  if (N1C && N0.getOpcode() == ISD::ZERO_EXTEND && N0.hasOneUse() &&
3770  N0.getOperand(0).getOpcode() == ISD::SRL &&
3771  isa<ConstantSDNode>(N0.getOperand(0)->getOperand(1))) {
3772  uint64_t c1 =
3773  cast<ConstantSDNode>(N0.getOperand(0)->getOperand(1))->getZExtValue();
3774  if (c1 < VT.getSizeInBits()) {
3775  uint64_t c2 = N1C->getZExtValue();
3776  if (c1 == c2) {
3777  SDValue NewOp0 = N0.getOperand(0);
3778  EVT CountVT = NewOp0.getOperand(1).getValueType();
3779  SDValue NewSHL = DAG.getNode(ISD::SHL, SDLoc(N), NewOp0.getValueType(),
3780  NewOp0, DAG.getConstant(c2, CountVT));
3781  AddToWorkList(NewSHL.getNode());
3782  return DAG.getNode(ISD::ZERO_EXTEND, SDLoc(N0), VT, NewSHL);
3783  }
3784  }
3785  }
3786 
3787  // fold (shl (srl x, c1), c2) -> (and (shl x, (sub c2, c1), MASK) or
3788  // (and (srl x, (sub c1, c2), MASK)
3789  // Only fold this if the inner shift has no other uses -- if it does, folding
3790  // this will increase the total number of instructions.
3791  if (N1C && N0.getOpcode() == ISD::SRL && N0.hasOneUse() &&
3792  N0.getOperand(1).getOpcode() == ISD::Constant) {
3793  uint64_t c1 = cast<ConstantSDNode>(N0.getOperand(1))->getZExtValue();
3794  if (c1 < VT.getSizeInBits()) {
3795  uint64_t c2 = N1C->getZExtValue();
3796  APInt Mask = APInt::getHighBitsSet(VT.getSizeInBits(),
3797  VT.getSizeInBits() - c1);
3798  SDValue Shift;
3799  if (c2 > c1) {
3800  Mask = Mask.shl(c2-c1);
3801  Shift = DAG.getNode(ISD::SHL, SDLoc(N), VT, N0.getOperand(0),
3802  DAG.getConstant(c2-c1, N1.getValueType()));
3803  } else {
3804  Mask = Mask.lshr(c1-c2);
3805  Shift = DAG.getNode(ISD::SRL, SDLoc(N), VT, N0.getOperand(0),
3806  DAG.getConstant(c1-c2, N1.getValueType()));
3807  }
3808  return DAG.getNode(ISD::AND, SDLoc(N0), VT, Shift,
3809  DAG.getConstant(Mask, VT));
3810  }
3811  }
3812  // fold (shl (sra x, c1), c1) -> (and x, (shl -1, c1))
3813  if (N1C && N0.getOpcode() == ISD::SRA && N1 == N0.getOperand(1)) {
3814  SDValue HiBitsMask =
3815  DAG.getConstant(APInt::getHighBitsSet(VT.getSizeInBits(),
3816  VT.getSizeInBits() -
3817  N1C->getZExtValue()),
3818  VT);
3819  return DAG.getNode(ISD::AND, SDLoc(N), VT, N0.getOperand(0),
3820  HiBitsMask);
3821  }
3822 
3823  if (N1C) {
3824  SDValue NewSHL = visitShiftByConstant(N, N1C->getZExtValue());
3825  if (NewSHL.getNode())
3826  return NewSHL;
3827  }
3828 
3829  return SDValue();
3830 }
3831 
3832 SDValue DAGCombiner::visitSRA(SDNode *N) {
3833  SDValue N0 = N->getOperand(0);
3834  SDValue N1 = N->getOperand(1);
3837  EVT VT = N0.getValueType();
3838  unsigned OpSizeInBits = VT.getScalarType().getSizeInBits();
3839 
3840  // fold vector ops
3841  if (VT.isVector()) {
3842  SDValue FoldedVOp = SimplifyVBinOp(N);
3843  if (FoldedVOp.getNode()) return FoldedVOp;
3844  }
3845 
3846  // fold (sra c1, c2) -> (sra c1, c2)
3847  if (N0C && N1C)
3848  return DAG.FoldConstantArithmetic(ISD::SRA, VT, N0C, N1C);
3849  // fold (sra 0, x) -> 0
3850  if (N0C && N0C->isNullValue())
3851  return N0;
3852  // fold (sra -1, x) -> -1
3853  if (N0C && N0C->isAllOnesValue())
3854  return N0;
3855  // fold (sra x, (setge c, size(x))) -> undef
3856  if (N1C && N1C->getZExtValue() >= OpSizeInBits)
3857  return DAG.getUNDEF(VT);
3858  // fold (sra x, 0) -> x
3859  if (N1C && N1C->isNullValue())
3860  return N0;
3861  // fold (sra (shl x, c1), c1) -> sext_inreg for some c1 and target supports
3862  // sext_inreg.
3863  if (N1C && N0.getOpcode() == ISD::SHL && N1 == N0.getOperand(1)) {
3864  unsigned LowBits = OpSizeInBits - (unsigned)N1C->getZExtValue();
3865  EVT ExtVT = EVT::getIntegerVT(*DAG.getContext(), LowBits);
3866  if (VT.isVector())
3867  ExtVT = EVT::getVectorVT(*DAG.getContext(),
3868  ExtVT, VT.getVectorNumElements());
3869  if ((!LegalOperations ||
3870  TLI.isOperationLegal(ISD::SIGN_EXTEND_INREG, ExtVT)))
3871  return DAG.getNode(ISD::SIGN_EXTEND_INREG, SDLoc(N), VT,
3872  N0.getOperand(0), DAG.getValueType(ExtVT));
3873  }
3874 
3875  // fold (sra (sra x, c1), c2) -> (sra x, (add c1, c2))
3876  if (N1C && N0.getOpcode() == ISD::SRA) {
3877  if (ConstantSDNode *C1 = dyn_cast<ConstantSDNode>(N0.getOperand(1))) {
3878  unsigned Sum = N1C->getZExtValue() + C1->getZExtValue();
3879  if (Sum >= OpSizeInBits) Sum = OpSizeInBits-1;
3880  return DAG.getNode(ISD::SRA, SDLoc(N), VT, N0.getOperand(0),
3881  DAG.getConstant(Sum, N1C->getValueType(0)));
3882  }
3883  }
3884 
3885  // fold (sra (shl X, m), (sub result_size, n))
3886  // -> (sign_extend (trunc (shl X, (sub (sub result_size, n), m)))) for
3887  // result_size - n != m.
3888  // If truncate is free for the target sext(shl) is likely to result in better
3889  // code.
3890  if (N0.getOpcode() == ISD::SHL) {
3891  // Get the two constanst of the shifts, CN0 = m, CN = n.
3892  const ConstantSDNode *N01C = dyn_cast<ConstantSDNode>(N0.getOperand(1));
3893  if (N01C && N1C) {
3894  // Determine what the truncate's result bitsize and type would be.
3895  EVT TruncVT =
3896  EVT::getIntegerVT(*DAG.getContext(),
3897  OpSizeInBits - N1C->getZExtValue());
3898  // Determine the residual right-shift amount.
3899  signed ShiftAmt = N1C->getZExtValue() - N01C->getZExtValue();
3900 
3901  // If the shift is not a no-op (in which case this should be just a sign
3902  // extend already), the truncated to type is legal, sign_extend is legal
3903  // on that type, and the truncate to that type is both legal and free,
3904  // perform the transform.
3905  if ((ShiftAmt > 0) &&
3906  TLI.isOperationLegalOrCustom(ISD::SIGN_EXTEND, TruncVT) &&
3907  TLI.isOperationLegalOrCustom(ISD::TRUNCATE, VT) &&
3908  TLI.isTruncateFree(VT, TruncVT)) {
3909 
3910  SDValue Amt = DAG.getConstant(ShiftAmt,
3912  SDValue Shift = DAG.getNode(ISD::SRL, SDLoc(N0), VT,
3913  N0.getOperand(0), Amt);
3914  SDValue Trunc = DAG.getNode(ISD::TRUNCATE, SDLoc(N0), TruncVT,
3915  Shift);
3916  return DAG.getNode(ISD::SIGN_EXTEND, SDLoc(N),
3917  N->getValueType(0), Trunc);
3918  }
3919  }
3920  }
3921 
3922  // fold (sra x, (trunc (and y, c))) -> (sra x, (and (trunc y), (trunc c))).
3923  if (N1.getOpcode() == ISD::TRUNCATE &&
3924  N1.getOperand(0).getOpcode() == ISD::AND &&
3925  N1.hasOneUse() && N1.getOperand(0).hasOneUse()) {
3926  SDValue N101 = N1.getOperand(0).getOperand(1);
3927  if (ConstantSDNode *N101C = dyn_cast<ConstantSDNode>(N101)) {
3928  EVT TruncVT = N1.getValueType();
3929  SDValue N100 = N1.getOperand(0).getOperand(0);
3930  APInt TruncC = N101C->getAPIntValue();
3931  TruncC = TruncC.trunc(TruncVT.getScalarType().getSizeInBits());
3932  return DAG.getNode(ISD::SRA, SDLoc(N), VT, N0,
3933  DAG.getNode(ISD::AND, SDLoc(N),
3934  TruncVT,
3935  DAG.getNode(ISD::TRUNCATE,
3936  SDLoc(N),
3937  TruncVT, N100),
3938  DAG.getConstant(TruncC, TruncVT)));
3939  }
3940  }
3941 
3942  // fold (sra (trunc (sr x, c1)), c2) -> (trunc (sra x, c1+c2))
3943  // if c1 is equal to the number of bits the trunc removes
3944  if (N0.getOpcode() == ISD::TRUNCATE &&
3945  (N0.getOperand(0).getOpcode() == ISD::SRL ||
3946  N0.getOperand(0).getOpcode() == ISD::SRA) &&
3947  N0.getOperand(0).hasOneUse() &&
3948  N0.getOperand(0).getOperand(1).hasOneUse() &&
3949  N1C && isa<ConstantSDNode>(N0.getOperand(0).getOperand(1))) {
3950  EVT LargeVT = N0.getOperand(0).getValueType();
3951  ConstantSDNode *LargeShiftAmt =
3952  cast<ConstantSDNode>(N0.getOperand(0).getOperand(1));
3953 
3954  if (LargeVT.getScalarType().getSizeInBits() - OpSizeInBits ==
3955  LargeShiftAmt->getZExtValue()) {
3956  SDValue Amt =
3957  DAG.getConstant(LargeShiftAmt->getZExtValue() + N1C->getZExtValue(),
3959  SDValue SRA = DAG.getNode(ISD::SRA, SDLoc(N), LargeVT,
3960  N0.getOperand(0).getOperand(0), Amt);
3961  return DAG.getNode(ISD::TRUNCATE, SDLoc(N), VT, SRA);
3962  }
3963  }
3964 
3965  // Simplify, based on bits shifted out of the LHS.
3966  if (N1C && SimplifyDemandedBits(SDValue(N, 0)))
3967  return SDValue(N, 0);
3968 
3969 
3970  // If the sign bit is known to be zero, switch this to a SRL.
3971  if (DAG.SignBitIsZero(N0))
3972  return DAG.getNode(ISD::SRL, SDLoc(N), VT, N0, N1);
3973 
3974  if (N1C) {
3975  SDValue NewSRA = visitShiftByConstant(N, N1C->getZExtValue());
3976  if (NewSRA.getNode())
3977  return NewSRA;
3978  }
3979 
3980  return SDValue();
3981 }
3982 
3983 SDValue DAGCombiner::visitSRL(SDNode *N) {
3984  SDValue N0 = N->getOperand(0);
3985  SDValue N1 = N->getOperand(1);
3988  EVT VT = N0.getValueType();
3989  unsigned OpSizeInBits = VT.getScalarType().getSizeInBits();
3990 
3991  // fold vector ops
3992  if (VT.isVector()) {
3993  SDValue FoldedVOp = SimplifyVBinOp(N);
3994  if (FoldedVOp.getNode()) return FoldedVOp;
3995  }
3996 
3997  // fold (srl c1, c2) -> c1 >>u c2
3998  if (N0C && N1C)
3999  return DAG.FoldConstantArithmetic(ISD::SRL, VT, N0C, N1C);
4000  // fold (srl 0, x) -> 0
4001  if (N0C && N0C->isNullValue())
4002  return N0;
4003  // fold (srl x, c >= size(x)) -> undef
4004  if (N1C && N1C->getZExtValue() >= OpSizeInBits)
4005  return DAG.getUNDEF(VT);
4006  // fold (srl x, 0) -> x
4007  if (N1C && N1C->isNullValue())
4008  return N0;
4009  // if (srl x, c) is known to be zero, return 0
4010  if (N1C && DAG.MaskedValueIsZero(SDValue(N, 0),
4011  APInt::getAllOnesValue(OpSizeInBits)))
4012  return DAG.getConstant(0, VT);
4013 
4014  // fold (srl (srl x, c1), c2) -> 0 or (srl x, (add c1, c2))
4015  if (N1C && N0.getOpcode() == ISD::SRL &&
4016  N0.getOperand(1).getOpcode() == ISD::Constant) {
4017  uint64_t c1 = cast<ConstantSDNode>(N0.getOperand(1))->getZExtValue();
4018  uint64_t c2 = N1C->getZExtValue();
4019  if (c1 + c2 >= OpSizeInBits)
4020  return DAG.getConstant(0, VT);
4021  return DAG.getNode(ISD::SRL, SDLoc(N), VT, N0.getOperand(0),
4022  DAG.getConstant(c1 + c2, N1.getValueType()));
4023  }
4024 
4025  // fold (srl (trunc (srl x, c1)), c2) -> 0 or (trunc (srl x, (add c1, c2)))
4026  if (N1C && N0.getOpcode() == ISD::TRUNCATE &&
4027  N0.getOperand(0).getOpcode() == ISD::SRL &&
4028  isa<ConstantSDNode>(N0.getOperand(0)->getOperand(1))) {
4029  uint64_t c1 =
4030  cast<ConstantSDNode>(N0.getOperand(0)->getOperand(1))->getZExtValue();
4031  uint64_t c2 = N1C->getZExtValue();
4032  EVT InnerShiftVT = N0.getOperand(0).getValueType();
4033  EVT ShiftCountVT = N0.getOperand(0)->getOperand(1).getValueType();
4034  uint64_t InnerShiftSize = InnerShiftVT.getScalarType().getSizeInBits();
4035  // This is only valid if the OpSizeInBits + c1 = size of inner shift.
4036  if (c1 + OpSizeInBits == InnerShiftSize) {
4037  if (c1 + c2 >= InnerShiftSize)
4038  return DAG.getConstant(0, VT);
4039  return DAG.getNode(ISD::TRUNCATE, SDLoc(N0), VT,
4040  DAG.getNode(ISD::SRL, SDLoc(N0), InnerShiftVT,
4041  N0.getOperand(0)->getOperand(0),
4042  DAG.getConstant(c1 + c2, ShiftCountVT)));
4043  }
4044  }
4045 
4046  // fold (srl (shl x, c), c) -> (and x, cst2)
4047  if (N1C && N0.getOpcode() == ISD::SHL && N0.getOperand(1) == N1 &&
4048  N0.getValueSizeInBits() <= 64) {
4049  uint64_t ShAmt = N1C->getZExtValue()+64-N0.getValueSizeInBits();
4050  return DAG.getNode(ISD::AND, SDLoc(N), VT, N0.getOperand(0),
4051  DAG.getConstant(~0ULL >> ShAmt, VT));
4052  }
4053 
4054  // fold (srl (anyextend x), c) -> (and (anyextend (srl x, c)), mask)
4055  if (N1C && N0.getOpcode() == ISD::ANY_EXTEND) {
4056  // Shifting in all undef bits?
4057  EVT SmallVT = N0.getOperand(0).getValueType();
4058  if (N1C->getZExtValue() >= SmallVT.getSizeInBits())
4059  return DAG.getUNDEF(VT);
4060 
4061  if (!LegalTypes || TLI.isTypeDesirableForOp(ISD::SRL, SmallVT)) {
4062  uint64_t ShiftAmt = N1C->getZExtValue();
4063  SDValue SmallShift = DAG.getNode(ISD::SRL, SDLoc(N0), SmallVT,
4064  N0.getOperand(0),
4065  DAG.getConstant(ShiftAmt, getShiftAmountTy(SmallVT)));
4066  AddToWorkList(SmallShift.getNode());
4067  APInt Mask = APInt::getAllOnesValue(VT.getSizeInBits()).lshr(ShiftAmt);
4068  return DAG.getNode(ISD::AND, SDLoc(N), VT,
4069  DAG.getNode(ISD::ANY_EXTEND, SDLoc(N), VT, SmallShift),
4070  DAG.getConstant(Mask, VT));
4071  }
4072  }
4073 
4074  // fold (srl (sra X, Y), 31) -> (srl X, 31). This srl only looks at the sign
4075  // bit, which is unmodified by sra.
4076  if (N1C && N1C->getZExtValue() + 1 == VT.getSizeInBits()) {
4077  if (N0.getOpcode() == ISD::SRA)
4078  return DAG.getNode(ISD::SRL, SDLoc(N), VT, N0.getOperand(0), N1);
4079  }
4080 
4081  // fold (srl (ctlz x), "5") -> x iff x has one bit set (the low bit).
4082  if (N1C && N0.getOpcode() == ISD::CTLZ &&
4083  N1C->getAPIntValue() == Log2_32(VT.getSizeInBits())) {
4084  APInt KnownZero, KnownOne;
4085  DAG.ComputeMaskedBits(N0.getOperand(0), KnownZero, KnownOne);
4086 
4087  // If any of the input bits are KnownOne, then the input couldn't be all
4088  // zeros, thus the result of the srl will always be zero.
4089  if (KnownOne.getBoolValue()) return DAG.getConstant(0, VT);
4090 
4091  // If all of the bits input the to ctlz node are known to be zero, then
4092  // the result of the ctlz is "32" and the result of the shift is one.
4093  APInt UnknownBits = ~KnownZero;
4094  if (UnknownBits == 0) return DAG.getConstant(1, VT);
4095 
4096  // Otherwise, check to see if there is exactly one bit input to the ctlz.
4097  if ((UnknownBits & (UnknownBits - 1)) == 0) {
4098  // Okay, we know that only that the single bit specified by UnknownBits
4099  // could be set on input to the CTLZ node. If this bit is set, the SRL
4100  // will return 0, if it is clear, it returns 1. Change the CTLZ/SRL pair
4101  // to an SRL/XOR pair, which is likely to simplify more.
4102  unsigned ShAmt = UnknownBits.countTrailingZeros();
4103  SDValue Op = N0.getOperand(0);
4104 
4105  if (ShAmt) {
4106  Op = DAG.getNode(ISD::SRL, SDLoc(N0), VT, Op,
4107  DAG.getConstant(ShAmt, getShiftAmountTy(Op.getValueType())));
4108  AddToWorkList(Op.getNode());
4109  }
4110 
4111  return DAG.getNode(ISD::XOR, SDLoc(N), VT,
4112  Op, DAG.getConstant(1, VT));
4113  }
4114  }
4115 
4116  // fold (srl x, (trunc (and y, c))) -> (srl x, (and (trunc y), (trunc c))).
4117  if (N1.getOpcode() == ISD::TRUNCATE &&
4118  N1.getOperand(0).getOpcode() == ISD::AND &&
4119  N1.hasOneUse() && N1.getOperand(0).hasOneUse()) {
4120  SDValue N101 = N1.getOperand(0).getOperand(1);
4121  if (ConstantSDNode *N101C = dyn_cast<ConstantSDNode>(N101)) {
4122  EVT TruncVT = N1.getValueType();
4123  SDValue N100 = N1.getOperand(0).getOperand(0);
4124  APInt TruncC = N101C->getAPIntValue();
4125  TruncC = TruncC.trunc(TruncVT.getSizeInBits());
4126  return DAG.getNode(ISD::SRL, SDLoc(N), VT, N0,
4127  DAG.getNode(ISD::AND, SDLoc(N),
4128  TruncVT,
4129  DAG.getNode(ISD::TRUNCATE,
4130  SDLoc(N),
4131  TruncVT, N100),
4132  DAG.getConstant(TruncC, TruncVT)));
4133  }
4134  }
4135 
4136  // fold operands of srl based on knowledge that the low bits are not
4137  // demanded.
4138  if (N1C && SimplifyDemandedBits(SDValue(N, 0)))
4139  return SDValue(N, 0);
4140 
4141  if (N1C) {
4142  SDValue NewSRL = visitShiftByConstant(N, N1C->getZExtValue());
4143  if (NewSRL.getNode())
4144  return NewSRL;
4145  }
4146 
4147  // Attempt to convert a srl of a load into a narrower zero-extending load.
4148  SDValue NarrowLoad = ReduceLoadWidth(N);
4149  if (NarrowLoad.getNode())
4150  return NarrowLoad;
4151 
4152  // Here is a common situation. We want to optimize:
4153  //
4154  // %a = ...
4155  // %b = and i32 %a, 2
4156  // %c = srl i32 %b, 1
4157  // brcond i32 %c ...
4158  //
4159  // into
4160  //
4161  // %a = ...
4162  // %b = and %a, 2
4163  // %c = setcc eq %b, 0
4164  // brcond %c ...
4165  //
4166  // However when after the source operand of SRL is optimized into AND, the SRL
4167  // itself may not be optimized further. Look for it and add the BRCOND into
4168  // the worklist.
4169  if (N->hasOneUse()) {
4170  SDNode *Use = *N->use_begin();
4171  if (Use->getOpcode() == ISD::BRCOND)
4172  AddToWorkList(Use);
4173  else if (Use->getOpcode() == ISD::TRUNCATE && Use->hasOneUse()) {
4174  // Also look pass the truncate.
4175  Use = *Use->use_begin();
4176  if (Use->getOpcode() == ISD::BRCOND)
4177  AddToWorkList(Use);
4178  }
4179  }
4180 
4181  return SDValue();
4182 }
4183 
4184 SDValue DAGCombiner::visitCTLZ(SDNode *N) {
4185  SDValue N0 = N->getOperand(0);
4186  EVT VT = N->getValueType(0);
4187 
4188  // fold (ctlz c1) -> c2
4189  if (isa<ConstantSDNode>(N0))
4190  return DAG.getNode(ISD::CTLZ, SDLoc(N), VT, N0);
4191  return SDValue();
4192 }
4193 
4194 SDValue DAGCombiner::visitCTLZ_ZERO_UNDEF(SDNode *N) {
4195  SDValue N0 = N->getOperand(0);
4196  EVT VT = N->getValueType(0);
4197 
4198  // fold (ctlz_zero_undef c1) -> c2
4199  if (isa<ConstantSDNode>(N0))
4200  return DAG.getNode(ISD::CTLZ_ZERO_UNDEF, SDLoc(N), VT, N0);
4201  return SDValue();
4202 }
4203 
4204 SDValue DAGCombiner::visitCTTZ(SDNode *N) {
4205  SDValue N0 = N->getOperand(0);
4206  EVT VT = N->getValueType(0);
4207 
4208  // fold (cttz c1) -> c2
4209  if (isa<ConstantSDNode>(N0))
4210  return DAG.getNode(ISD::CTTZ, SDLoc(N), VT, N0);
4211  return SDValue();
4212 }
4213 
4214 SDValue DAGCombiner::visitCTTZ_ZERO_UNDEF(SDNode *N) {
4215  SDValue N0 = N->getOperand(0);
4216  EVT VT = N->getValueType(0);
4217 
4218  // fold (cttz_zero_undef c1) -> c2
4219  if (isa<ConstantSDNode>(N0))
4220  return DAG.getNode(ISD::CTTZ_ZERO_UNDEF, SDLoc(N), VT, N0);
4221  return SDValue();
4222 }
4223 
4224 SDValue DAGCombiner::visitCTPOP(SDNode *N) {
4225  SDValue N0 = N->getOperand(0);
4226  EVT VT = N->getValueType(0);
4227 
4228  // fold (ctpop c1) -> c2
4229  if (isa<ConstantSDNode>(N0))
4230  return DAG.getNode(ISD::CTPOP, SDLoc(N), VT, N0);
4231  return SDValue();
4232 }
4233 
4234 SDValue DAGCombiner::visitSELECT(SDNode *N) {
4235  SDValue N0 = N->getOperand(0);
4236  SDValue N1 = N->getOperand(1);
4237  SDValue N2 = N->getOperand(2);
4241  EVT VT = N->getValueType(0);
4242  EVT VT0 = N0.getValueType();
4243 
4244  // fold (select C, X, X) -> X
4245  if (N1 == N2)
4246  return N1;
4247  // fold (select true, X, Y) -> X
4248  if (N0C && !N0C->isNullValue())
4249  return N1;
4250  // fold (select false, X, Y) -> Y
4251  if (N0C && N0C->isNullValue())
4252  return N2;
4253  // fold (select C, 1, X) -> (or C, X)
4254  if (VT == MVT::i1 && N1C && N1C->getAPIntValue() == 1)
4255  return DAG.getNode(ISD::OR, SDLoc(N), VT, N0, N2);
4256  // fold (select C, 0, 1) -> (xor C, 1)
4257  if (VT.isInteger() &&
4258  (VT0 == MVT::i1 ||
4259  (VT0.isInteger() &&
4260  TLI.getBooleanContents(false) ==
4262  N1C && N2C && N1C->isNullValue() && N2C->getAPIntValue() == 1) {
4263  SDValue XORNode;
4264  if (VT == VT0)
4265  return DAG.getNode(ISD::XOR, SDLoc(N), VT0,
4266  N0, DAG.getConstant(1, VT0));
4267  XORNode = DAG.getNode(ISD::XOR, SDLoc(N0), VT0,
4268  N0, DAG.getConstant(1, VT0));
4269  AddToWorkList(XORNode.getNode());
4270  if (VT.bitsGT(VT0))
4271  return DAG.getNode(ISD::ZERO_EXTEND, SDLoc(N), VT, XORNode);
4272  return DAG.getNode(ISD::TRUNCATE, SDLoc(N), VT, XORNode);
4273  }
4274  // fold (select C, 0, X) -> (and (not C), X)
4275  if (VT == VT0 && VT == MVT::i1 && N1C && N1C->isNullValue()) {
4276  SDValue NOTNode = DAG.getNOT(SDLoc(N0), N0, VT);
4277  AddToWorkList(NOTNode.getNode());
4278  return DAG.getNode(ISD::AND, SDLoc(N), VT, NOTNode, N2);
4279  }
4280  // fold (select C, X, 1) -> (or (not C), X)
4281  if (VT == VT0 && VT == MVT::i1 && N2C && N2C->getAPIntValue() == 1) {
4282  SDValue NOTNode = DAG.getNOT(SDLoc(N0), N0, VT);
4283  AddToWorkList(NOTNode.getNode());
4284  return DAG.getNode(ISD::OR, SDLoc(N), VT, NOTNode, N1);
4285  }
4286  // fold (select C, X, 0) -> (and C, X)
4287  if (VT == MVT::i1 && N2C && N2C->isNullValue())
4288  return DAG.getNode(ISD::AND, SDLoc(N), VT, N0, N1);
4289  // fold (select X, X, Y) -> (or X, Y)
4290  // fold (select X, 1, Y) -> (or X, Y)
4291  if (VT == MVT::i1 && (N0 == N1 || (N1C && N1C->getAPIntValue() == 1)))
4292  return DAG.getNode(ISD::OR, SDLoc(N), VT, N0, N2);
4293  // fold (select X, Y, X) -> (and X, Y)
4294  // fold (select X, Y, 0) -> (and X, Y)
4295  if (VT == MVT::i1 && (N0 == N2 || (N2C && N2C->getAPIntValue() == 0)))
4296  return DAG.getNode(ISD::AND, SDLoc(N), VT, N0, N1);
4297 
4298  // If we can fold this based on the true/false value, do so.
4299  if (SimplifySelectOps(N, N1, N2))
4300  return SDValue(N, 0); // Don't revisit N.
4301 
4302  // fold selects based on a setcc into other things, such as min/max/abs
4303  if (N0.getOpcode() == ISD::SETCC) {
4304  // FIXME:
4305  // Check against MVT::Other for SELECT_CC, which is a workaround for targets
4306  // having to say they don't support SELECT_CC on every type the DAG knows
4307  // about, since there is no way to mark an opcode illegal at all value types
4308  if (TLI.isOperationLegalOrCustom(ISD::SELECT_CC, MVT::Other) &&
4309  TLI.isOperationLegalOrCustom(ISD::SELECT_CC, VT))
4310  return DAG.getNode(ISD::SELECT_CC, SDLoc(N), VT,
4311  N0.getOperand(0), N0.getOperand(1),
4312  N1, N2, N0.getOperand(2));
4313  return SimplifySelect(SDLoc(N), N0, N1, N2);
4314  }
4315 
4316  return SDValue();
4317 }
4318 
4319 static
4320 std::pair<SDValue, SDValue> SplitVSETCC(const SDNode *N, SelectionDAG &DAG) {
4321  SDLoc DL(N);
4322  EVT LoVT, HiVT;
4323  llvm::tie(LoVT, HiVT) = DAG.GetSplitDestVTs(N->getValueType(0));
4324 
4325  // Split the inputs.
4326  SDValue Lo, Hi, LL, LH, RL, RH;
4327  llvm::tie(LL, LH) = DAG.SplitVectorOperand(N, 0);
4328  llvm::tie(RL, RH) = DAG.SplitVectorOperand(N, 1);
4329 
4330  Lo = DAG.getNode(N->getOpcode(), DL, LoVT, LL, RL, N->getOperand(2));
4331  Hi = DAG.getNode(N->getOpcode(), DL, HiVT, LH, RH, N->getOperand(2));
4332 
4333  return std::make_pair(Lo, Hi);
4334 }
4335 
4336 SDValue DAGCombiner::visitVSELECT(SDNode *N) {
4337  SDValue N0 = N->getOperand(0);
4338  SDValue N1 = N->getOperand(1);
4339  SDValue N2 = N->getOperand(2);
4340  SDLoc DL(N);
4341 
4342  // Canonicalize integer abs.
4343  // vselect (setg[te] X, 0), X, -X ->
4344  // vselect (setgt X, -1), X, -X ->
4345  // vselect (setl[te] X, 0), -X, X ->
4346  // Y = sra (X, size(X)-1); xor (add (X, Y), Y)
4347  if (N0.getOpcode() == ISD::SETCC) {
4348  SDValue LHS = N0.getOperand(0), RHS = N0.getOperand(1);
4349  ISD::CondCode CC = cast<CondCodeSDNode>(N0.getOperand(2))->get();
4350  bool isAbs = false;
4351  bool RHSIsAllZeros = ISD::isBuildVectorAllZeros(RHS.getNode());
4352 
4353  if (((RHSIsAllZeros && (CC == ISD::SETGT || CC == ISD::SETGE)) ||
4354  (ISD::isBuildVectorAllOnes(RHS.getNode()) && CC == ISD::SETGT)) &&
4355  N1 == LHS && N2.getOpcode() == ISD::SUB && N1 == N2.getOperand(1))
4357  else if ((RHSIsAllZeros && (CC == ISD::SETLT || CC == ISD::SETLE)) &&
4358  N2 == LHS && N1.getOpcode() == ISD::SUB && N2 == N1.getOperand(1))
4359  isAbs = ISD::isBuildVectorAllZeros(N1.getOperand(0).getNode());
4360 
4361  if (isAbs) {
4362  EVT VT = LHS.getValueType();
4363  SDValue Shift = DAG.getNode(
4364  ISD::SRA, DL, VT, LHS,
4365  DAG.getConstant(VT.getScalarType().getSizeInBits() - 1, VT));
4366  SDValue Add = DAG.getNode(ISD::ADD, DL, VT, LHS, Shift);
4367  AddToWorkList(Shift.getNode());
4368  AddToWorkList(Add.getNode());
4369  return DAG.getNode(ISD::XOR, DL, VT, Add, Shift);
4370  }
4371  }
4372 
4373  // If the VSELECT result requires splitting and the mask is provided by a
4374  // SETCC, then split both nodes and its operands before legalization. This
4375  // prevents the type legalizer from unrolling SETCC into scalar comparisons
4376  // and enables future optimizations (e.g. min/max pattern matching on X86).
4377  if (N0.getOpcode() == ISD::SETCC) {
4378  EVT VT = N->getValueType(0);
4379 
4380  // Check if any splitting is required.
4381  if (TLI.getTypeAction(*DAG.getContext(), VT) !=
4383  return SDValue();
4384 
4385  SDValue Lo, Hi, CCLo, CCHi, LL, LH, RL, RH;
4386  llvm::tie(CCLo, CCHi) = SplitVSETCC(N0.getNode(), DAG);
4387  llvm::tie(LL, LH) = DAG.SplitVectorOperand(N, 1);
4388  llvm::tie(RL, RH) = DAG.SplitVectorOperand(N, 2);
4389 
4390  Lo = DAG.getNode(N->getOpcode(), DL, LL.getValueType(), CCLo, LL, RL);
4391  Hi = DAG.getNode(N->getOpcode(), DL, LH.getValueType(), CCHi, LH, RH);
4392 
4393  // Add the new VSELECT nodes to the work list in case they need to be split
4394  // again.
4395  AddToWorkList(Lo.getNode());
4396  AddToWorkList(Hi.getNode());
4397 
4398  return DAG.getNode(ISD::CONCAT_VECTORS, DL, VT, Lo, Hi);
4399  }
4400 
4401  return SDValue();
4402 }
4403 
4404 SDValue DAGCombiner::visitSELECT_CC(SDNode *N) {
4405  SDValue N0 = N->getOperand(0);
4406  SDValue N1 = N->getOperand(1);
4407  SDValue N2 = N->getOperand(2);
4408  SDValue N3 = N->getOperand(3);
4409  SDValue N4 = N->getOperand(4);
4410  ISD::CondCode CC = cast<CondCodeSDNode>(N4)->get();
4411 
4412  // fold select_cc lhs, rhs, x, x, cc -> x
4413  if (N2 == N3)
4414  return N2;
4415 
4416  // Determine if the condition we're dealing with is constant
4418  N0, N1, CC, SDLoc(N), false);
4419  if (SCC.getNode()) {
4420  AddToWorkList(SCC.getNode());
4421 
4422  if (ConstantSDNode *SCCC = dyn_cast<ConstantSDNode>(SCC.getNode())) {
4423  if (!SCCC->isNullValue())
4424  return N2; // cond always true -> true val
4425  else
4426  return N3; // cond always false -> false val
4427  }
4428 
4429  // Fold to a simpler select_cc
4430  if (SCC.getOpcode() == ISD::SETCC)
4431  return DAG.getNode(ISD::SELECT_CC, SDLoc(N), N2.getValueType(),
4432  SCC.getOperand(0), SCC.getOperand(1), N2, N3,
4433  SCC.getOperand(2));
4434  }
4435 
4436  // If we can fold this based on the true/false value, do so.
4437  if (SimplifySelectOps(N, N2, N3))
4438  return SDValue(N, 0); // Don't revisit N.
4439 
4440  // fold select_cc into other things, such as min/max/abs
4441  return SimplifySelectCC(SDLoc(N), N0, N1, N2, N3, CC);
4442 }
4443 
4444 SDValue DAGCombiner::visitSETCC(SDNode *N) {
4445  return SimplifySetCC(N->getValueType(0), N->getOperand(0), N->getOperand(1),
4446  cast<CondCodeSDNode>(N->getOperand(2))->get(),
4447  SDLoc(N));
4448 }
4449 
4450 // ExtendUsesToFormExtLoad - Trying to extend uses of a load to enable this:
4451 // "fold ({s|z|a}ext (load x)) -> ({s|z|a}ext (truncate ({s|z|a}extload x)))"
4452 // transformation. Returns true if extension are possible and the above
4453 // mentioned transformation is profitable.
4455  unsigned ExtOpc,
4456  SmallVectorImpl<SDNode *> &ExtendNodes,
4457  const TargetLowering &TLI) {
4458  bool HasCopyToRegUses = false;
4459  bool isTruncFree = TLI.isTruncateFree(N->getValueType(0), N0.getValueType());
4460  for (SDNode::use_iterator UI = N0.getNode()->use_begin(),
4461  UE = N0.getNode()->use_end();
4462  UI != UE; ++UI) {
4463  SDNode *User = *UI;
4464  if (User == N)
4465  continue;
4466  if (UI.getUse().getResNo() != N0.getResNo())
4467  continue;
4468  // FIXME: Only extend SETCC N, N and SETCC N, c for now.
4469  if (ExtOpc != ISD::ANY_EXTEND && User->getOpcode() == ISD::SETCC) {
4470  ISD::CondCode CC = cast<CondCodeSDNode>(User->getOperand(2))->get();
4471  if (ExtOpc == ISD::ZERO_EXTEND && ISD::isSignedIntSetCC(CC))
4472  // Sign bits will be lost after a zext.
4473  return false;
4474  bool Add = false;
4475  for (unsigned i = 0; i != 2; ++i) {
4476  SDValue UseOp = User->getOperand(i);
4477  if (UseOp == N0)
4478  continue;
4479  if (!isa<ConstantSDNode>(UseOp))
4480  return false;
4481  Add = true;
4482  }
4483  if (Add)
4484  ExtendNodes.push_back(User);
4485  continue;
4486  }
4487  // If truncates aren't free and there are users we can't
4488  // extend, it isn't worthwhile.
4489  if (!isTruncFree)
4490  return false;
4491  // Remember if this value is live-out.
4492  if (User->getOpcode() == ISD::CopyToReg)
4493  HasCopyToRegUses = true;
4494  }
4495 
4496  if (HasCopyToRegUses) {
4497  bool BothLiveOut = false;
4498  for (SDNode::use_iterator UI = N->use_begin(), UE = N->use_end();
4499  UI != UE; ++UI) {
4500  SDUse &Use = UI.getUse();
4501  if (Use.getResNo() == 0 && Use.getUser()->getOpcode() == ISD::CopyToReg) {
4502  BothLiveOut = true;
4503  break;
4504  }
4505  }
4506  if (BothLiveOut)
4507  // Both unextended and extended values are live out. There had better be
4508  // a good reason for the transformation.
4509  return ExtendNodes.size();
4510  }
4511  return true;
4512 }
4513 
4514 void DAGCombiner::ExtendSetCCUses(const SmallVectorImpl<SDNode *> &SetCCs,
4515  SDValue Trunc, SDValue ExtLoad, SDLoc DL,
4516  ISD::NodeType ExtType) {
4517  // Extend SetCC uses if necessary.
4518  for (unsigned i = 0, e = SetCCs.size(); i != e; ++i) {
4519  SDNode *SetCC = SetCCs[i];
4521 
4522  for (unsigned j = 0; j != 2; ++j) {
4523  SDValue SOp = SetCC->getOperand(j);
4524  if (SOp == Trunc)
4525  Ops.push_back(ExtLoad);
4526  else
4527  Ops.push_back(DAG.getNode(ExtType, DL, ExtLoad->getValueType(0), SOp));
4528  }
4529 
4530  Ops.push_back(SetCC->getOperand(2));
4531  CombineTo(SetCC, DAG.getNode(ISD::SETCC, DL, SetCC->getValueType(0),
4532  &Ops[0], Ops.size()));
4533  }
4534 }
4535 
4536 SDValue DAGCombiner::visitSIGN_EXTEND(SDNode *N) {
4537  SDValue N0 = N->getOperand(0);
4538  EVT VT = N->getValueType(0);
4539 
4540  // fold (sext c1) -> c1
4541  if (isa<ConstantSDNode>(N0))
4542  return DAG.getNode(ISD::SIGN_EXTEND, SDLoc(N), VT, N0);
4543 
4544  // fold (sext (sext x)) -> (sext x)
4545  // fold (sext (aext x)) -> (sext x)
4546  if (N0.getOpcode() == ISD::SIGN_EXTEND || N0.getOpcode() == ISD::ANY_EXTEND)
4547  return DAG.getNode(ISD::SIGN_EXTEND, SDLoc(N), VT,
4548  N0.getOperand(0));
4549 
4550  if (N0.getOpcode() == ISD::TRUNCATE) {
4551  // fold (sext (truncate (load x))) -> (sext (smaller load x))
4552  // fold (sext (truncate (srl (load x), c))) -> (sext (smaller load (x+c/n)))
4553  SDValue NarrowLoad = ReduceLoadWidth(N0.getNode());
4554  if (NarrowLoad.getNode()) {
4555  SDNode* oye = N0.getNode()->getOperand(0).getNode();
4556  if (NarrowLoad.getNode() != N0.getNode()) {
4557  CombineTo(N0.getNode(), NarrowLoad);
4558  // CombineTo deleted the truncate, if needed, but not what's under it.
4559  AddToWorkList(oye);
4560  }
4561  return SDValue(N, 0); // Return N so it doesn't get rechecked!
4562  }
4563 
4564  // See if the value being truncated is already sign extended. If so, just
4565  // eliminate the trunc/sext pair.
4566  SDValue Op = N0.getOperand(0);
4567  unsigned OpBits = Op.getValueType().getScalarType().getSizeInBits();
4568  unsigned MidBits = N0.getValueType().getScalarType().getSizeInBits();
4569  unsigned DestBits = VT.getScalarType().getSizeInBits();
4570  unsigned NumSignBits = DAG.ComputeNumSignBits(Op);
4571 
4572  if (OpBits == DestBits) {
4573  // Op is i32, Mid is i8, and Dest is i32. If Op has more than 24 sign
4574  // bits, it is already ready.
4575  if (NumSignBits > DestBits-MidBits)
4576  return Op;
4577  } else if (OpBits < DestBits) {
4578  // Op is i32, Mid is i8, and Dest is i64. If Op has more than 24 sign
4579  // bits, just sext from i32.
4580  if (NumSignBits > OpBits-MidBits)
4581  return DAG.getNode(ISD::SIGN_EXTEND, SDLoc(N), VT, Op);
4582  } else {
4583  // Op is i64, Mid is i8, and Dest is i32. If Op has more than 56 sign
4584  // bits, just truncate to i32.
4585  if (NumSignBits > OpBits-MidBits)
4586  return DAG.getNode(ISD::TRUNCATE, SDLoc(N), VT, Op);
4587  }
4588 
4589  // fold (sext (truncate x)) -> (sextinreg x).
4590  if (!LegalOperations || TLI.isOperationLegal(ISD::SIGN_EXTEND_INREG,
4591  N0.getValueType())) {
4592  if (OpBits < DestBits)
4593  Op = DAG.getNode(ISD::ANY_EXTEND, SDLoc(N0), VT, Op);
4594  else if (OpBits > DestBits)
4595  Op = DAG.getNode(ISD::TRUNCATE, SDLoc(N0), VT, Op);
4596  return DAG.getNode(ISD::SIGN_EXTEND_INREG, SDLoc(N), VT, Op,
4597  DAG.getValueType(N0.getValueType()));
4598  }
4599  }
4600 
4601  // fold (sext (load x)) -> (sext (truncate (sextload x)))
4602  // None of the supported targets knows how to perform load and sign extend
4603  // on vectors in one instruction. We only perform this transformation on
4604  // scalars.
4605  if (ISD::isNON_EXTLoad(N0.getNode()) && !VT.isVector() &&
4606  ((!LegalOperations && !cast<LoadSDNode>(N0)->isVolatile()) ||
4607  TLI.isLoadExtLegal(ISD::SEXTLOAD, N0.getValueType()))) {
4608  bool DoXform = true;
4609  SmallVector<SDNode*, 4> SetCCs;
4610  if (!N0.hasOneUse())
4611  DoXform = ExtendUsesToFormExtLoad(N, N0, ISD::SIGN_EXTEND, SetCCs, TLI);
4612  if (DoXform) {
4613  LoadSDNode *LN0 = cast<LoadSDNode>(N0);
4614  SDValue ExtLoad = DAG.getExtLoad(ISD::SEXTLOAD, SDLoc(N), VT,
4615  LN0->getChain(),
4616  LN0->getBasePtr(), N0.getValueType(),
4617  LN0->getMemOperand());
4618  CombineTo(N, ExtLoad);
4619  SDValue Trunc = DAG.getNode(ISD::TRUNCATE, SDLoc(N0),
4620  N0.getValueType(), ExtLoad);
4621  CombineTo(N0.getNode(), Trunc, ExtLoad.getValue(1));
4622  ExtendSetCCUses(SetCCs, Trunc, ExtLoad, SDLoc(N),
4624  return SDValue(N, 0); // Return N so it doesn't get rechecked!
4625  }
4626  }
4627 
4628  // fold (sext (sextload x)) -> (sext (truncate (sextload x)))
4629  // fold (sext ( extload x)) -> (sext (truncate (sextload x)))
4630  if ((ISD::isSEXTLoad(N0.getNode()) || ISD::isEXTLoad(N0.getNode())) &&
4631  ISD::isUNINDEXEDLoad(N0.getNode()) && N0.hasOneUse()) {
4632  LoadSDNode *LN0 = cast<LoadSDNode>(N0);
4633  EVT MemVT = LN0->getMemoryVT();
4634  if ((!LegalOperations && !LN0->isVolatile()) ||
4635  TLI.isLoadExtLegal(ISD::SEXTLOAD, MemVT)) {
4636  SDValue ExtLoad = DAG.getExtLoad(ISD::SEXTLOAD, SDLoc(N), VT,
4637  LN0->getChain(),
4638  LN0->getBasePtr(), MemVT,
4639  LN0->getMemOperand());
4640  CombineTo(N, ExtLoad);
4641  CombineTo(N0.getNode(),
4642  DAG.getNode(ISD::TRUNCATE, SDLoc(N0),
4643  N0.getValueType(), ExtLoad),
4644  ExtLoad.getValue(1));
4645  return SDValue(N, 0); // Return N so it doesn't get rechecked!
4646  }
4647  }
4648 
4649  // fold (sext (and/or/xor (load x), cst)) ->
4650  // (and/or/xor (sextload x), (sext cst))
4651  if ((N0.getOpcode() == ISD::AND || N0.getOpcode() == ISD::OR ||
4652  N0.getOpcode() == ISD::XOR) &&
4653  isa<LoadSDNode>(N0.getOperand(0)) &&
4654  N0.getOperand(1).getOpcode() == ISD::Constant &&
4655  TLI.isLoadExtLegal(ISD::SEXTLOAD, N0.getValueType()) &&
4656  (!LegalOperations && TLI.isOperationLegal(N0.getOpcode(), VT))) {
4657  LoadSDNode *LN0 = cast<LoadSDNode>(N0.getOperand(0));
4658  if (LN0->getExtensionType() != ISD::ZEXTLOAD) {
4659  bool DoXform = true;
4660  SmallVector<SDNode*, 4> SetCCs;
4661  if (!N0.hasOneUse())
4663  SetCCs, TLI);
4664  if (DoXform) {
4665  SDValue ExtLoad = DAG.getExtLoad(ISD::SEXTLOAD, SDLoc(LN0), VT,
4666  LN0->getChain(), LN0->getBasePtr(),
4667  LN0->getMemoryVT(),
4668  LN0->getMemOperand());
4669  APInt Mask = cast<ConstantSDNode>(N0.getOperand(1))->getAPIntValue();
4670  Mask = Mask.sext(VT.getSizeInBits());
4671  SDValue And = DAG.getNode(N0.getOpcode(), SDLoc(N), VT,
4672  ExtLoad, DAG.getConstant(Mask, VT));
4673  SDValue Trunc = DAG.getNode(ISD::TRUNCATE,
4674  SDLoc(N0.getOperand(0)),
4675  N0.getOperand(0).getValueType(), ExtLoad);
4676  CombineTo(N, And);
4677  CombineTo(N0.getOperand(0).getNode(), Trunc, ExtLoad.getValue(1));
4678  ExtendSetCCUses(SetCCs, Trunc, ExtLoad, SDLoc(N),
4680  return SDValue(N, 0); // Return N so it doesn't get rechecked!
4681  }
4682  }
4683  }
4684 
4685  if (N0.getOpcode() == ISD::SETCC) {
4686  // sext(setcc) -> sext_in_reg(vsetcc) for vectors.
4687  // Only do this before legalize for now.
4688  if (VT.isVector() && !LegalOperations &&
4689  TLI.getBooleanContents(true) ==
4691  EVT N0VT = N0.getOperand(0).getValueType();
4692  // On some architectures (such as SSE/NEON/etc) the SETCC result type is
4693  // of the same size as the compared operands. Only optimize sext(setcc())
4694  // if this is the case.
4695  EVT SVT = getSetCCResultType(N0VT);
4696 
4697  // We know that the # elements of the results is the same as the
4698  // # elements of the compare (and the # elements of the compare result
4699  // for that matter). Check to see that they are the same size. If so,
4700  // we know that the element size of the sext'd result matches the
4701  // element size of the compare operands.
4702  if (VT.getSizeInBits() == SVT.getSizeInBits())
4703  return DAG.getSetCC(SDLoc(N), VT, N0.getOperand(0),
4704  N0.getOperand(1),
4705  cast<CondCodeSDNode>(N0.getOperand(2))->get());
4706 
4707  // If the desired elements are smaller or larger than the source
4708  // elements we can use a matching integer vector type and then
4709  // truncate/sign extend
4710  EVT MatchingVectorType = N0VT.changeVectorElementTypeToInteger();
4711  if (SVT == MatchingVectorType) {
4712  SDValue VsetCC = DAG.getSetCC(SDLoc(N), MatchingVectorType,
4713  N0.getOperand(0), N0.getOperand(1),
4714  cast<CondCodeSDNode>(N0.getOperand(2))->get());
4715  return DAG.getSExtOrTrunc(VsetCC, SDLoc(N), VT);
4716  }
4717  }
4718 
4719  // sext(setcc x, y, cc) -> (select_cc x, y, -1, 0, cc)
4720  unsigned ElementWidth = VT.getScalarType().getSizeInBits();
4721  SDValue NegOne =
4722  DAG.getConstant(APInt::getAllOnesValue(ElementWidth), VT);
4723  SDValue SCC =
4724  SimplifySelectCC(SDLoc(N), N0.getOperand(0), N0.getOperand(1),
4725  NegOne, DAG.getConstant(0, VT),
4726  cast<CondCodeSDNode>(N0.getOperand(2))->get(), true);
4727  if (SCC.getNode()) return SCC;
4728  if (!VT.isVector() &&
4729  (!LegalOperations ||
4730  TLI.isOperationLegal(ISD::SETCC, getSetCCResultType(VT)))) {
4731  return DAG.getSelect(SDLoc(N), VT,
4732  DAG.getSetCC(SDLoc(N),
4733  getSetCCResultType(VT),
4734  N0.getOperand(0), N0.getOperand(1),
4735  cast<CondCodeSDNode>(N0.getOperand(2))->get()),
4736  NegOne, DAG.getConstant(0, VT));
4737  }
4738  }
4739 
4740  // fold (sext x) -> (zext x) if the sign bit is known zero.
4741  if ((!LegalOperations || TLI.isOperationLegal(ISD::ZERO_EXTEND, VT)) &&
4742  DAG.SignBitIsZero(N0))
4743  return DAG.getNode(ISD::ZERO_EXTEND, SDLoc(N), VT, N0);
4744 
4745  return SDValue();
4746 }
4747 
4748 // isTruncateOf - If N is a truncate of some other value, return true, record
4749 // the value being truncated in Op and which of Op's bits are zero in KnownZero.
4750 // This function computes KnownZero to avoid a duplicated call to
4751 // ComputeMaskedBits in the caller.
4752 static bool isTruncateOf(SelectionDAG &DAG, SDValue N, SDValue &Op,
4753  APInt &KnownZero) {
4754  APInt KnownOne;
4755  if (N->getOpcode() == ISD::TRUNCATE) {
4756  Op = N->getOperand(0);
4757  DAG.ComputeMaskedBits(Op, KnownZero, KnownOne);
4758  return true;
4759  }
4760 
4761  if (N->getOpcode() != ISD::SETCC || N->getValueType(0) != MVT::i1 ||
4762  cast<CondCodeSDNode>(N->getOperand(2))->get() != ISD::SETNE)
4763  return false;
4764 
4765  SDValue Op0 = N->getOperand(0);
4766  SDValue Op1 = N->getOperand(1);
4767  assert(Op0.getValueType() == Op1.getValueType());
4768 
4769  ConstantSDNode *COp0 = dyn_cast<ConstantSDNode>(Op0);
4770  ConstantSDNode *COp1 = dyn_cast<ConstantSDNode>(Op1);
4771  if (COp0 && COp0->isNullValue())
4772  Op = Op1;
4773  else if (COp1 && COp1->isNullValue())
4774  Op = Op0;
4775  else
4776  return false;
4777 
4778  DAG.ComputeMaskedBits(Op, KnownZero, KnownOne);
4779 
4780  if (!(KnownZero | APInt(Op.getValueSizeInBits(), 1)).isAllOnesValue())
4781  return false;
4782 
4783  return true;
4784 }
4785 
4786 SDValue DAGCombiner::visitZERO_EXTEND(SDNode *N) {
4787  SDValue N0 = N->getOperand(0);
4788  EVT VT = N->getValueType(0);
4789 
4790  // fold (zext c1) -> c1
4791  if (isa<ConstantSDNode>(N0))
4792  return DAG.getNode(ISD::ZERO_EXTEND, SDLoc(N), VT, N0);
4793  // fold (zext (zext x)) -> (zext x)
4794  // fold (zext (aext x)) -> (zext x)
4795  if (N0.getOpcode() == ISD::ZERO_EXTEND || N0.getOpcode() == ISD::ANY_EXTEND)
4796  return DAG.getNode(ISD::ZERO_EXTEND, SDLoc(N), VT,
4797  N0.getOperand(0));
4798 
4799  // fold (zext (truncate x)) -> (zext x) or
4800  // (zext (truncate x)) -> (truncate x)
4801  // This is valid when the truncated bits of x are already zero.
4802  // FIXME: We should extend this to work for vectors too.
4803  SDValue Op;
4804  APInt KnownZero;
4805  if (!VT.isVector() && isTruncateOf(DAG, N0, Op, KnownZero)) {
4806  APInt TruncatedBits =
4807  (Op.getValueSizeInBits() == N0.getValueSizeInBits()) ?
4808  APInt(Op.getValueSizeInBits(), 0) :
4810  N0.getValueSizeInBits(),
4811  std::min(Op.getValueSizeInBits(),
4812  VT.getSizeInBits()));
4813  if (TruncatedBits == (KnownZero & TruncatedBits)) {
4814  if (VT.bitsGT(Op.getValueType()))
4815  return DAG.getNode(ISD::ZERO_EXTEND, SDLoc(N), VT, Op);
4816  if (VT.bitsLT(Op.getValueType()))
4817  return DAG.getNode(ISD::TRUNCATE, SDLoc(N), VT, Op);
4818 
4819  return Op;
4820  }
4821  }
4822 
4823  // fold (zext (truncate (load x))) -> (zext (smaller load x))
4824  // fold (zext (truncate (srl (load x), c))) -> (zext (small load (x+c/n)))
4825  if (N0.getOpcode() == ISD::TRUNCATE) {
4826  SDValue NarrowLoad = ReduceLoadWidth(N0.getNode());
4827  if (NarrowLoad.getNode()) {
4828  SDNode* oye = N0.getNode()->getOperand(0).getNode();
4829  if (NarrowLoad.getNode() != N0.getNode()) {
4830  CombineTo(N0.getNode(), NarrowLoad);
4831  // CombineTo deleted the truncate, if needed, but not what's under it.
4832  AddToWorkList(oye);
4833  }
4834  return SDValue(N, 0); // Return N so it doesn't get rechecked!
4835  }
4836  }
4837 
4838  // fold (zext (truncate x)) -> (and x, mask)
4839  if (N0.getOpcode() == ISD::TRUNCATE &&
4840  (!LegalOperations || TLI.isOperationLegal(ISD::AND, VT))) {
4841 
4842  // fold (zext (truncate (load x))) -> (zext (smaller load x))
4843  // fold (zext (truncate (srl (load x), c))) -> (zext (smaller load (x+c/n)))
4844  SDValue NarrowLoad = ReduceLoadWidth(N0.getNode());
4845  if (NarrowLoad.getNode()) {
4846  SDNode* oye = N0.getNode()->getOperand(0).getNode();
4847  if (NarrowLoad.getNode() != N0.getNode()) {
4848  CombineTo(N0.getNode(), NarrowLoad);
4849  // CombineTo deleted the truncate, if needed, but not what's under it.
4850  AddToWorkList(oye);
4851  }
4852  return SDValue(N, 0); // Return N so it doesn't get rechecked!
4853  }
4854 
4855  SDValue Op = N0.getOperand(0);
4856  if (Op.getValueType().bitsLT(VT)) {
4857  Op = DAG.getNode(ISD::ANY_EXTEND, SDLoc(N), VT, Op);
4858  AddToWorkList(Op.getNode());
4859  } else if (Op.getValueType().bitsGT(VT)) {
4860  Op = DAG.getNode(ISD::TRUNCATE, SDLoc(N), VT, Op);
4861  AddToWorkList(Op.getNode());
4862  }
4863  return DAG.getZeroExtendInReg(Op, SDLoc(N),
4864  N0.getValueType().getScalarType());
4865  }
4866 
4867  // Fold (zext (and (trunc x), cst)) -> (and x, cst),
4868  // if either of the casts is not free.
4869  if (N0.getOpcode() == ISD::AND &&
4870  N0.getOperand(0).getOpcode() == ISD::TRUNCATE &&
4871  N0.getOperand(1).getOpcode() == ISD::Constant &&
4872  (!TLI.isTruncateFree(N0.getOperand(0).getOperand(0).getValueType(),
4873  N0.getValueType()) ||
4874  !TLI.isZExtFree(N0.getValueType(), VT))) {
4875  SDValue X = N0.getOperand(0).getOperand(0);
4876  if (X.getValueType().bitsLT(VT)) {
4877  X = DAG.getNode(ISD::ANY_EXTEND, SDLoc(X), VT, X);
4878  } else if (X.getValueType().bitsGT(VT)) {
4879  X = DAG.getNode(ISD::TRUNCATE, SDLoc(X), VT, X);
4880  }
4881  APInt Mask = cast<ConstantSDNode>(N0.getOperand(1))->getAPIntValue();
4882  Mask = Mask.zext(VT.getSizeInBits());
4883  return DAG.getNode(ISD::AND, SDLoc(N), VT,
4884  X, DAG.getConstant(Mask, VT));
4885  }
4886 
4887  // fold (zext (load x)) -> (zext (truncate (zextload x)))
4888  // None of the supported targets knows how to perform load and vector_zext
4889  // on vectors in one instruction. We only perform this transformation on
4890  // scalars.
4891  if (ISD::isNON_EXTLoad(N0.getNode()) && !VT.isVector() &&
4892  ((!LegalOperations && !cast<LoadSDNode>(N0)->isVolatile()) ||
4893  TLI.isLoadExtLegal(ISD::ZEXTLOAD, N0.getValueType()))) {
4894  bool DoXform = true;
4895  SmallVector<SDNode*, 4> SetCCs;
4896  if (!N0.hasOneUse())
4897  DoXform = ExtendUsesToFormExtLoad(N, N0, ISD::ZERO_EXTEND, SetCCs, TLI);
4898  if (DoXform) {
4899  LoadSDNode *LN0 = cast<LoadSDNode>(N0);
4900  SDValue ExtLoad = DAG.getExtLoad(ISD::ZEXTLOAD, SDLoc(N), VT,
4901  LN0->getChain(),
4902  LN0->getBasePtr(), N0.getValueType(),
4903  LN0->getMemOperand());
4904  CombineTo(N, ExtLoad);
4905  SDValue Trunc = DAG.getNode(ISD::TRUNCATE, SDLoc(N0),
4906  N0.getValueType(), ExtLoad);
4907  CombineTo(N0.getNode(), Trunc, ExtLoad.getValue(1));
4908 
4909  ExtendSetCCUses(SetCCs, Trunc, ExtLoad, SDLoc(N),
4911  return SDValue(N, 0); // Return N so it doesn't get rechecked!
4912  }
4913  }
4914 
4915  // fold (zext (and/or/xor (load x), cst)) ->
4916  // (and/or/xor (zextload x), (zext cst))
4917  if ((N0.getOpcode() == ISD::AND || N0.getOpcode() == ISD::OR ||
4918  N0.getOpcode() == ISD::XOR) &&
4919  isa<LoadSDNode>(N0.getOperand(0)) &&
4920  N0.getOperand(1).getOpcode() == ISD::Constant &&
4921  TLI.isLoadExtLegal(ISD::ZEXTLOAD, N0.getValueType()) &&
4922  (!LegalOperations && TLI.isOperationLegal(N0.getOpcode(), VT))) {
4923  LoadSDNode *LN0 = cast<LoadSDNode>(N0.getOperand(0));
4924  if (LN0->getExtensionType() != ISD::SEXTLOAD) {
4925  bool DoXform = true;
4926  SmallVector<SDNode*, 4> SetCCs;
4927  if (!N0.hasOneUse())
4929  SetCCs, TLI);
4930  if (DoXform) {
4931  SDValue ExtLoad = DAG.getExtLoad(ISD::ZEXTLOAD, SDLoc(LN0), VT,
4932  LN0->getChain(), LN0->getBasePtr(),
4933  LN0->getMemoryVT(),
4934  LN0->getMemOperand());
4935  APInt Mask = cast<ConstantSDNode>(N0.getOperand(1))->getAPIntValue();
4936  Mask = Mask.zext(VT.getSizeInBits());
4937  SDValue And = DAG.getNode(N0.getOpcode(), SDLoc(N), VT,
4938  ExtLoad, DAG.getConstant(Mask, VT));
4939  SDValue Trunc = DAG.getNode(ISD::TRUNCATE,
4940  SDLoc(N0.getOperand(0)),
4941  N0.getOperand(0).getValueType(), ExtLoad);
4942  CombineTo(N, And);
4943  CombineTo(N0.getOperand(0).getNode(), Trunc, ExtLoad.getValue(1));
4944  ExtendSetCCUses(SetCCs, Trunc, ExtLoad, SDLoc(N),
4946  return SDValue(N, 0); // Return N so it doesn't get rechecked!
4947  }
4948  }
4949  }
4950 
4951  // fold (zext (zextload x)) -> (zext (truncate (zextload x)))
4952  // fold (zext ( extload x)) -> (zext (truncate (zextload x)))
4953  if ((ISD::isZEXTLoad(N0.getNode()) || ISD::isEXTLoad(N0.getNode())) &&
4954  ISD::isUNINDEXEDLoad(N0.getNode()) && N0.hasOneUse()) {
4955  LoadSDNode *LN0 = cast<LoadSDNode>(N0);
4956  EVT MemVT = LN0->getMemoryVT();
4957  if ((!LegalOperations && !LN0->isVolatile()) ||
4958  TLI.isLoadExtLegal(ISD::ZEXTLOAD, MemVT)) {
4959  SDValue ExtLoad = DAG.getExtLoad(ISD::ZEXTLOAD, SDLoc(N), VT,
4960  LN0->getChain(),
4961  LN0->getBasePtr(), MemVT,
4962  LN0->getMemOperand());
4963  CombineTo(N, ExtLoad);
4964  CombineTo(N0.getNode(),
4965  DAG.getNode(ISD::TRUNCATE, SDLoc(N0), N0.getValueType(),
4966  ExtLoad),
4967  ExtLoad.getValue(1));
4968  return SDValue(N, 0); // Return N so it doesn't get rechecked!
4969  }
4970  }
4971 
4972  if (N0.getOpcode() == ISD::SETCC) {
4973  if (!LegalOperations && VT.isVector()) {
4974  // zext(setcc) -> (and (vsetcc), (1, 1, ...) for vectors.
4975  // Only do this before legalize for now.
4976  EVT N0VT = N0.getOperand(0).getValueType();
4977  EVT EltVT = VT.getVectorElementType();
4979  DAG.getConstant(1, EltVT));
4980  if (VT.getSizeInBits() == N0VT.getSizeInBits())
4981  // We know that the # elements of the results is the same as the
4982  // # elements of the compare (and the # elements of the compare result
4983  // for that matter). Check to see that they are the same size. If so,
4984  // we know that the element size of the sext'd result matches the
4985  // element size of the compare operands.
4986  return DAG.getNode(ISD::AND, SDLoc(N), VT,
4987  DAG.getSetCC(SDLoc(N), VT, N0.getOperand(0),
4988  N0.getOperand(1),
4989  cast<CondCodeSDNode>(N0.getOperand(2))->get()),
4990  DAG.getNode(ISD::BUILD_VECTOR, SDLoc(N), VT,
4991  &OneOps[0], OneOps.size()));
4992 
4993  // If the desired elements are smaller or larger than the source
4994  // elements we can use a matching integer vector type and then
4995  // truncate/sign extend
4996  EVT MatchingElementType =
4997  EVT::getIntegerVT(*DAG.getContext(),
4998  N0VT.getScalarType().getSizeInBits());
4999  EVT MatchingVectorType =
5000  EVT::getVectorVT(*DAG.getContext(), MatchingElementType,
5001  N0VT.getVectorNumElements());
5002  SDValue VsetCC =
5003  DAG.getSetCC(SDLoc(N), MatchingVectorType, N0.getOperand(0),
5004  N0.getOperand(1),
5005  cast<CondCodeSDNode>(N0.getOperand(2))->get());
5006  return DAG.getNode(ISD::AND, SDLoc(N), VT,
5007  DAG.getSExtOrTrunc(VsetCC, SDLoc(N), VT),
5008  DAG.getNode(ISD::BUILD_VECTOR, SDLoc(N), VT,
5009  &OneOps[0], OneOps.size()));
5010  }
5011 
5012  // zext(setcc x,y,cc) -> select_cc x, y, 1, 0, cc
5013  SDValue SCC =
5014  SimplifySelectCC(SDLoc(N), N0.getOperand(0), N0.getOperand(1),
5015  DAG.getConstant(1, VT), DAG.getConstant(0, VT),
5016  cast<CondCodeSDNode>(N0.getOperand(2))->get(), true);
5017  if (SCC.getNode()) return SCC;
5018  }
5019 
5020  // (zext (shl (zext x), cst)) -> (shl (zext x), cst)
5021  if ((N0.getOpcode() == ISD::SHL || N0.getOpcode() == ISD::SRL) &&
5022  isa<ConstantSDNode>(N0.getOperand(1)) &&
5023  N0.getOperand(0).getOpcode() == ISD::ZERO_EXTEND &&
5024  N0.hasOneUse()) {
5025  SDValue ShAmt = N0.getOperand(1);
5026  unsigned ShAmtVal = cast<ConstantSDNode>(ShAmt)->getZExtValue();
5027  if (N0.getOpcode() == ISD::SHL) {
5028  SDValue InnerZExt = N0.getOperand(0);
5029  // If the original shl may be shifting out bits, do not perform this
5030  // transformation.
5031  unsigned KnownZeroBits = InnerZExt.getValueType().getSizeInBits() -
5032  InnerZExt.getOperand(0).getValueType().getSizeInBits();
5033  if (ShAmtVal > KnownZeroBits)
5034  return SDValue();
5035  }
5036 
5037  SDLoc DL(N);
5038 
5039  // Ensure that the shift amount is wide enough for the shifted value.
5040  if (VT.getSizeInBits() >= 256)
5041  ShAmt = DAG.getNode(ISD::ZERO_EXTEND, DL, MVT::i32, ShAmt);
5042 
5043  return DAG.getNode(N0.getOpcode(), DL, VT,
5044  DAG.getNode(ISD::ZERO_EXTEND, DL, VT, N0.getOperand(0)),
5045  ShAmt);
5046  }
5047 
5048  return SDValue();
5049 }
5050 
5051 SDValue DAGCombiner::visitANY_EXTEND(SDNode *N) {
5052  SDValue N0 = N->getOperand(0);
5053  EVT VT = N->getValueType(0);
5054 
5055  // fold (aext c1) -> c1
5056  if (isa<ConstantSDNode>(N0))
5057  return DAG.getNode(ISD::ANY_EXTEND, SDLoc(N), VT, N0);
5058  // fold (aext (aext x)) -> (aext x)
5059  // fold (aext (zext x)) -> (zext x)
5060  // fold (aext (sext x)) -> (sext x)
5061  if (N0.getOpcode() == ISD::ANY_EXTEND ||
5062  N0.getOpcode() == ISD::ZERO_EXTEND ||
5063  N0.getOpcode() == ISD::SIGN_EXTEND)
5064  return DAG.getNode(N0.getOpcode(), SDLoc(N), VT, N0.getOperand(0));
5065 
5066  // fold (aext (truncate (load x))) -> (aext (smaller load x))
5067  // fold (aext (truncate (srl (load x), c))) -> (aext (small load (x+c/n)))
5068  if (N0.getOpcode() == ISD::TRUNCATE) {
5069  SDValue NarrowLoad = ReduceLoadWidth(N0.getNode());
5070  if (NarrowLoad.getNode()) {
5071  SDNode* oye = N0.getNode()->getOperand(0).getNode();
5072  if (NarrowLoad.getNode() != N0.getNode()) {
5073  CombineTo(N0.getNode(), NarrowLoad);
5074  // CombineTo deleted the truncate, if needed, but not what's under it.
5075  AddToWorkList(oye);
5076  }
5077  return SDValue(N, 0); // Return N so it doesn't get rechecked!
5078  }
5079  }
5080 
5081  // fold (aext (truncate x))
5082  if (N0.getOpcode() == ISD::TRUNCATE) {
5083  SDValue TruncOp = N0.getOperand(0);
5084  if (TruncOp.getValueType() == VT)
5085  return TruncOp; // x iff x size == zext size.
5086  if (TruncOp.getValueType().bitsGT(VT))
5087  return DAG.getNode(ISD::TRUNCATE, SDLoc(N), VT, TruncOp);
5088  return DAG.getNode(ISD::ANY_EXTEND, SDLoc(N), VT, TruncOp);
5089  }
5090 
5091  // Fold (aext (and (trunc x), cst)) -> (and x, cst)
5092  // if the trunc is not free.
5093  if (N0.getOpcode() == ISD::AND &&
5094  N0.getOperand(0).getOpcode() == ISD::TRUNCATE &&
5095  N0.getOperand(1).getOpcode() == ISD::Constant &&
5096  !TLI.isTruncateFree(N0.getOperand(0).getOperand(0).getValueType(),
5097  N0.getValueType())) {
5098  SDValue X = N0.getOperand(0).getOperand(0);
5099  if (X.getValueType().bitsLT(VT)) {
5100  X = DAG.getNode(ISD::ANY_EXTEND, SDLoc(N), VT, X);
5101  } else if (X.getValueType().bitsGT(VT)) {
5102  X = DAG.getNode(ISD::TRUNCATE, SDLoc(N), VT, X);
5103  }
5104  APInt Mask = cast<ConstantSDNode>(N0.getOperand(1))->getAPIntValue();
5105  Mask = Mask.zext(VT.getSizeInBits());
5106  return DAG.getNode(ISD::AND, SDLoc(N), VT,
5107  X, DAG.getConstant(Mask, VT));
5108  }
5109 
5110  // fold (aext (load x)) -> (aext (truncate (extload x)))
5111  // None of the supported targets knows how to perform load and any_ext
5112  // on vectors in one instruction. We only perform this transformation on
5113  // scalars.
5114  if (ISD::isNON_EXTLoad(N0.getNode()) && !VT.isVector() &&
5115  ((!LegalOperations && !cast<LoadSDNode>(N0)->isVolatile()) ||
5116  TLI.isLoadExtLegal(ISD::EXTLOAD, N0.getValueType()))) {
5117  bool DoXform = true;
5118  SmallVector<SDNode*, 4> SetCCs;
5119  if (!N0.hasOneUse())
5120  DoXform = ExtendUsesToFormExtLoad(N, N0, ISD::ANY_EXTEND, SetCCs, TLI);
5121  if (DoXform) {
5122  LoadSDNode *LN0 = cast<LoadSDNode>(N0);
5123  SDValue ExtLoad = DAG.getExtLoad(ISD::EXTLOAD, SDLoc(N), VT,
5124  LN0->getChain(),
5125  LN0->getBasePtr(), N0.getValueType(),
5126  LN0->getMemOperand());
5127  CombineTo(N, ExtLoad);
5128  SDValue Trunc = DAG.getNode(ISD::TRUNCATE, SDLoc(N0),
5129  N0.getValueType(), ExtLoad);
5130  CombineTo(N0.getNode(), Trunc, ExtLoad.getValue(1));
5131  ExtendSetCCUses(SetCCs, Trunc, ExtLoad, SDLoc(N),
5132  ISD::ANY_EXTEND);
5133  return SDValue(N, 0); // Return N so it doesn't get rechecked!
5134  }
5135  }
5136 
5137  // fold (aext (zextload x)) -> (aext (truncate (zextload x)))
5138  // fold (aext (sextload x)) -> (aext (truncate (sextload x)))
5139  // fold (aext ( extload x)) -> (aext (truncate (extload x)))
5140  if (N0.getOpcode() == ISD::LOAD &&
5142  N0.hasOneUse()) {
5143  LoadSDNode *LN0 = cast<LoadSDNode>(N0);
5144  EVT MemVT = LN0->getMemoryVT();
5145  SDValue ExtLoad = DAG.getExtLoad(LN0->getExtensionType(), SDLoc(N),
5146  VT, LN0->getChain(), LN0->getBasePtr(),
5147  MemVT, LN0->getMemOperand());
5148  CombineTo(N, ExtLoad);
5149  CombineTo(N0.getNode(),
5150  DAG.getNode(ISD::TRUNCATE, SDLoc(N0),
5151  N0.getValueType(), ExtLoad),
5152  ExtLoad.getValue(1));
5153  return SDValue(N, 0); // Return N so it doesn't get rechecked!
5154  }
5155 
5156  if (N0.getOpcode() == ISD::SETCC) {
5157  // aext(setcc) -> sext_in_reg(vsetcc) for vectors.
5158  // Only do this before legalize for now.
5159  if (VT.isVector() && !LegalOperations) {
5160  EVT N0VT = N0.getOperand(0).getValueType();
5161  // We know that the # elements of the results is the same as the
5162  // # elements of the compare (and the # elements of the compare result
5163  // for that matter). Check to see that they are the same size. If so,
5164  // we know that the element size of the sext'd result matches the
5165  // element size of the compare operands.
5166  if (VT.getSizeInBits() == N0VT.getSizeInBits())
5167  return DAG.getSetCC(SDLoc(N), VT, N0.getOperand(0),
5168  N0.getOperand(1),
5169  cast<CondCodeSDNode>(N0.getOperand(2))->get());
5170  // If the desired elements are smaller or larger than the source
5171  // elements we can use a matching integer vector type and then
5172  // truncate/sign extend
5173  else {
5174  EVT MatchingElementType =
5175  EVT::getIntegerVT(*DAG.getContext(),
5176  N0VT.getScalarType().getSizeInBits());
5177  EVT MatchingVectorType =
5178  EVT::getVectorVT(*DAG.getContext(), MatchingElementType,
5179  N0VT.getVectorNumElements());
5180  SDValue VsetCC =
5181  DAG.getSetCC(SDLoc(N), MatchingVectorType, N0.getOperand(0),
5182  N0.getOperand(1),
5183  cast<CondCodeSDNode>(N0.getOperand(2))->get());
5184  return DAG.getSExtOrTrunc(VsetCC, SDLoc(N), VT);
5185  }
5186  }
5187 
5188  // aext(setcc x,y,cc) -> select_cc x, y, 1, 0, cc
5189  SDValue SCC =
5190  SimplifySelectCC(SDLoc(N), N0.getOperand(0), N0.getOperand(1),
5191  DAG.getConstant(1, VT), DAG.getConstant(0, VT),
5192  cast<CondCodeSDNode>(N0.getOperand(2))->get(), true);
5193  if (SCC.getNode())
5194  return SCC;
5195  }
5196 
5197  return SDValue();
5198 }
5199 
5200 /// GetDemandedBits - See if the specified operand can be simplified with the
5201 /// knowledge that only the bits specified by Mask are used. If so, return the
5202 /// simpler operand, otherwise return a null SDValue.
5203 SDValue DAGCombiner::GetDemandedBits(SDValue V, const APInt &Mask) {
5204  switch (V.getOpcode()) {
5205  default: break;
5206  case ISD::Constant: {
5207  const ConstantSDNode *CV = cast<ConstantSDNode>(V.getNode());
5208  assert(CV != 0 && "Const value should be ConstSDNode.");
5209  const APInt &CVal = CV->getAPIntValue();
5210  APInt NewVal = CVal & Mask;
5211  if (NewVal != CVal)
5212  return DAG.getConstant(NewVal, V.getValueType());
5213  break;
5214  }
5215  case ISD::OR:
5216  case ISD::XOR:
5217  // If the LHS or RHS don't contribute bits to the or, drop them.
5218  if (DAG.MaskedValueIsZero(V.getOperand(0), Mask))
5219  return V.getOperand(1);
5220  if (DAG.MaskedValueIsZero(V.getOperand(1), Mask))
5221  return V.getOperand(0);
5222  break;
5223  case ISD::SRL:
5224  // Only look at single-use SRLs.
5225  if (!V.getNode()->hasOneUse())
5226  break;
5227  if (ConstantSDNode *RHSC = dyn_cast<ConstantSDNode>(V.getOperand(1))) {
5228  // See if we can recursively simplify the LHS.
5229  unsigned Amt = RHSC->getZExtValue();
5230 
5231  // Watch out for shift count overflow though.
5232  if (Amt >= Mask.getBitWidth()) break;
5233  APInt NewMask = Mask << Amt;
5234  SDValue SimplifyLHS = GetDemandedBits(V.getOperand(0), NewMask);
5235  if (SimplifyLHS.getNode())
5236  return DAG.getNode(ISD::SRL, SDLoc(V), V.getValueType(),
5237  SimplifyLHS, V.getOperand(1));
5238  }
5239  }
5240  return SDValue();
5241 }
5242 
5243 /// ReduceLoadWidth - If the result of a wider load is shifted to right of N
5244 /// bits and then truncated to a narrower type and where N is a multiple
5245 /// of number of bits of the narrower type, transform it to a narrower load
5246 /// from address + N / num of bits of new type. If the result is to be
5247 /// extended, also fold the extension to form a extending load.
5248 SDValue DAGCombiner::ReduceLoadWidth(SDNode *N) {
5249  unsigned Opc = N->getOpcode();
5250 
5252  SDValue N0 = N->getOperand(0);
5253  EVT VT = N->getValueType(0);
5254  EVT ExtVT = VT;
5255 
5256  // This transformation isn't valid for vector loads.
5257  if (VT.isVector())
5258  return SDValue();
5259 
5260  // Special case: SIGN_EXTEND_INREG is basically truncating to ExtVT then
5261  // extended to VT.
5262  if (Opc == ISD::SIGN_EXTEND_INREG) {
5263  ExtType = ISD::SEXTLOAD;
5264  ExtVT = cast<VTSDNode>(N->getOperand(1))->getVT();
5265  } else if (Opc == ISD::SRL) {
5266  // Another special-case: SRL is basically zero-extending a narrower value.
5267  ExtType = ISD::ZEXTLOAD;
5268  N0 = SDValue(N, 0);
5270  if (!N01) return SDValue();
5271  ExtVT = EVT::getIntegerVT(*DAG.getContext(),
5272  VT.getSizeInBits() - N01->getZExtValue());
5273  }
5274  if (LegalOperations && !TLI.isLoadExtLegal(ExtType, ExtVT))
5275  return SDValue();
5276 
5277  unsigned EVTBits = ExtVT.getSizeInBits();
5278 
5279  // Do not generate loads of non-round integer types since these can
5280  // be expensive (and would be wrong if the type is not byte sized).
5281  if (!ExtVT.isRound())
5282  return SDValue();
5283 
5284  unsigned ShAmt = 0;
5285  if (N0.getOpcode() == ISD::SRL && N0.hasOneUse()) {
5286  if (ConstantSDNode *N01 = dyn_cast<ConstantSDNode>(N0.getOperand(1))) {
5287  ShAmt = N01->getZExtValue();
5288  // Is the shift amount a multiple of size of VT?
5289  if ((ShAmt & (EVTBits-1)) == 0) {
5290  N0 = N0.getOperand(0);
5291  // Is the load width a multiple of size of VT?
5292  if ((N0.getValueType().getSizeInBits() & (EVTBits-1)) != 0)
5293  return SDValue();
5294  }
5295 
5296  // At this point, we must have a load or else we can't do the transform.
5297  if (!isa<LoadSDNode>(N0)) return SDValue();
5298 
5299  // Because a SRL must be assumed to *need* to zero-extend the high bits
5300  // (as opposed to anyext the high bits), we can't combine the zextload
5301  // lowering of SRL and an sextload.
5302  if (cast<LoadSDNode>(N0)->getExtensionType() == ISD::SEXTLOAD)
5303  return SDValue();
5304 
5305  // If the shift amount is larger than the input type then we're not
5306  // accessing any of the loaded bytes. If the load was a zextload/extload
5307  // then the result of the shift+trunc is zero/undef (handled elsewhere).
5308  if (ShAmt >= cast<LoadSDNode>(N0)->getMemoryVT().getSizeInBits())
5309  return SDValue();
5310  }
5311  }
5312 
5313  // If the load is shifted left (and the result isn't shifted back right),
5314  // we can fold the truncate through the shift.
5315  unsigned ShLeftAmt = 0;
5316  if (ShAmt == 0 && N0.getOpcode() == ISD::SHL && N0.hasOneUse() &&
5317  ExtVT == VT && TLI.isNarrowingProfitable(N0.getValueType(), VT)) {
5318  if (ConstantSDNode *N01 = dyn_cast<ConstantSDNode>(N0.getOperand(1))) {
5319  ShLeftAmt = N01->getZExtValue();
5320  N0 = N0.getOperand(0);
5321  }
5322  }
5323 
5324  // If we haven't found a load, we can't narrow it. Don't transform one with
5325  // multiple uses, this would require adding a new load.
5326  if (!isa<LoadSDNode>(N0) || !N0.hasOneUse())
5327  return SDValue();
5328 
5329  // Don't change the width of a volatile load.
5330  LoadSDNode *LN0 = cast<LoadSDNode>(N0);
5331  if (LN0->isVolatile())
5332  return SDValue();
5333 
5334  // Verify that we are actually reducing a load width here.
5335  if (LN0->getMemoryVT().getSizeInBits() < EVTBits)
5336  return SDValue();
5337 
5338  // For the transform to be legal, the load must produce only two values
5339  // (the value loaded and the chain). Don't transform a pre-increment
5340  // load, for example, which produces an extra value. Otherwise the
5341  // transformation is not equivalent, and the downstream logic to replace
5342  // uses gets things wrong.
5343  if (LN0->getNumValues() > 2)
5344  return SDValue();
5345 
5346  // If the load that we're shrinking is an extload and we're not just
5347  // discarding the extension we can't simply shrink the load. Bail.
5348  // TODO: It would be possible to merge the extensions in some cases.
5349  if (LN0->getExtensionType() != ISD::NON_EXTLOAD &&
5350  LN0->getMemoryVT().getSizeInBits() < ExtVT.getSizeInBits() + ShAmt)
5351  return SDValue();
5352 
5353  EVT PtrType = N0.getOperand(1).getValueType();
5354 
5355  if (PtrType == MVT::Untyped || PtrType.isExtended())
5356  // It's not possible to generate a constant of extended or untyped type.
5357  return SDValue();
5358 
5359  // For big endian targets, we need to adjust the offset to the pointer to
5360  // load the correct bytes.
5361  if (TLI.isBigEndian()) {
5362  unsigned LVTStoreBits = LN0->getMemoryVT().getStoreSizeInBits();
5363  unsigned EVTStoreBits = ExtVT.getStoreSizeInBits();
5364  ShAmt = LVTStoreBits - EVTStoreBits - ShAmt;
5365  }
5366 
5367  uint64_t PtrOff = ShAmt / 8;
5368  unsigned NewAlign = MinAlign(LN0->getAlignment(), PtrOff);
5369  SDValue NewPtr = DAG.getNode(ISD::ADD, SDLoc(LN0),
5370  PtrType, LN0->getBasePtr(),
5371  DAG.getConstant(PtrOff, PtrType));
5372  AddToWorkList(NewPtr.getNode());
5373 
5374  SDValue Load;
5375  if (ExtType == ISD::NON_EXTLOAD)
5376  Load = DAG.getLoad(VT, SDLoc(N0), LN0->getChain(), NewPtr,
5377  LN0->getPointerInfo().getWithOffset(PtrOff),
5378  LN0->isVolatile(), LN0->isNonTemporal(),
5379  LN0->isInvariant(), NewAlign, LN0->getTBAAInfo());
5380  else
5381  Load = DAG.getExtLoad(ExtType, SDLoc(N0), VT, LN0->getChain(),NewPtr,
5382  LN0->getPointerInfo().getWithOffset(PtrOff),
5383  ExtVT, LN0->isVolatile(), LN0->isNonTemporal(),
5384  NewAlign, LN0->getTBAAInfo());
5385 
5386  // Replace the old load's chain with the new load's chain.
5387  WorkListRemover DeadNodes(*this);
5388  DAG.ReplaceAllUsesOfValueWith(N0.getValue(1), Load.getValue(1));
5389 
5390  // Shift the result left, if we've swallowed a left shift.
5391  SDValue Result = Load;
5392  if (ShLeftAmt != 0) {
5393  EVT ShImmTy = getShiftAmountTy(Result.getValueType());
5394  if (!isUIntN(ShImmTy.getSizeInBits(), ShLeftAmt))
5395  ShImmTy = VT;
5396  // If the shift amount is as large as the result size (but, presumably,
5397  // no larger than the source) then the useful bits of the result are
5398  // zero; we can't simply return the shortened shift, because the result
5399  // of that operation is undefined.
5400  if (ShLeftAmt >= VT.getSizeInBits())
5401  Result = DAG.getConstant(0, VT);
5402  else
5403  Result = DAG.getNode(ISD::SHL, SDLoc(N0), VT,
5404  Result, DAG.getConstant(ShLeftAmt, ShImmTy));
5405  }
5406 
5407  // Return the new loaded value.
5408  return Result;
5409 }
5410 
5411 SDValue DAGCombiner::visitSIGN_EXTEND_INREG(SDNode *N) {
5412  SDValue N0 = N->getOperand(0);
5413  SDValue N1 = N->getOperand(1);
5414  EVT VT = N->getValueType(0);
5415  EVT EVT = cast<VTSDNode>(N1)->getVT();
5416  unsigned VTBits = VT.getScalarType().getSizeInBits();
5417  unsigned EVTBits = EVT.getScalarType().getSizeInBits();
5418 
5419  // fold (sext_in_reg c1) -> c1
5420  if (isa<ConstantSDNode>(N0) || N0.getOpcode() == ISD::UNDEF)
5421  return DAG.getNode(ISD::SIGN_EXTEND_INREG, SDLoc(N), VT, N0, N1);
5422 
5423  // If the input is already sign extended, just drop the extension.
5424  if (DAG.ComputeNumSignBits(N0) >= VTBits-EVTBits+1)
5425  return N0;
5426 
5427  // fold (sext_in_reg (sext_in_reg x, VT2), VT1) -> (sext_in_reg x, minVT) pt2
5428  if (N0.getOpcode() == ISD::SIGN_EXTEND_INREG &&
5429  EVT.bitsLT(cast<VTSDNode>(N0.getOperand(1))->getVT()))
5430  return DAG.getNode(ISD::SIGN_EXTEND_INREG, SDLoc(N), VT,
5431  N0.getOperand(0), N1);
5432 
5433  // fold (sext_in_reg (sext x)) -> (sext x)
5434  // fold (sext_in_reg (aext x)) -> (sext x)
5435  // if x is small enough.
5436  if (N0.getOpcode() == ISD::SIGN_EXTEND || N0.getOpcode() == ISD::ANY_EXTEND) {
5437  SDValue N00 = N0.getOperand(0);
5438  if (N00.getValueType().getScalarType().getSizeInBits() <= EVTBits &&
5439  (!LegalOperations || TLI.isOperationLegal(ISD::SIGN_EXTEND, VT)))
5440  return DAG.getNode(ISD::SIGN_EXTEND, SDLoc(N), VT, N00, N1);
5441  }
5442 
5443  // fold (sext_in_reg x) -> (zext_in_reg x) if the sign bit is known zero.
5444  if (DAG.MaskedValueIsZero(N0, APInt::getBitsSet(VTBits, EVTBits-1, EVTBits)))
5445  return DAG.getZeroExtendInReg(N0, SDLoc(N), EVT);
5446 
5447  // fold operands of sext_in_reg based on knowledge that the top bits are not
5448  // demanded.
5449  if (SimplifyDemandedBits(SDValue(N, 0)))
5450  return SDValue(N, 0);
5451 
5452  // fold (sext_in_reg (load x)) -> (smaller sextload x)
5453  // fold (sext_in_reg (srl (load x), c)) -> (smaller sextload (x+c/evtbits))
5454  SDValue NarrowLoad = ReduceLoadWidth(N);
5455  if (NarrowLoad.getNode())
5456  return NarrowLoad;
5457 
5458  // fold (sext_in_reg (srl X, 24), i8) -> (sra X, 24)
5459  // fold (sext_in_reg (srl X, 23), i8) -> (sra X, 23) iff possible.
5460  // We already fold "(sext_in_reg (srl X, 25), i8) -> srl X, 25" above.
5461  if (N0.getOpcode() == ISD::SRL) {
5462  if (ConstantSDNode *ShAmt = dyn_cast<ConstantSDNode>(N0.getOperand(1)))
5463  if (ShAmt->getZExtValue()+EVTBits <= VTBits) {
5464  // We can turn this into an SRA iff the input to the SRL is already sign
5465  // extended enough.
5466  unsigned InSignBits = DAG.ComputeNumSignBits(N0.getOperand(0));
5467  if (VTBits-(ShAmt->getZExtValue()+EVTBits) < InSignBits)
5468  return DAG.getNode(ISD::SRA, SDLoc(N), VT,
5469  N0.getOperand(0), N0.getOperand(1));
5470  }
5471  }
5472 
5473  // fold (sext_inreg (extload x)) -> (sextload x)
5474  if (ISD::isEXTLoad(N0.getNode()) &&
5475  ISD::isUNINDEXEDLoad(N0.getNode()) &&
5476  EVT == cast<LoadSDNode>(N0)->getMemoryVT() &&
5477  ((!LegalOperations && !cast<LoadSDNode>(N0)->isVolatile()) ||
5478  TLI.isLoadExtLegal(ISD::SEXTLOAD, EVT))) {
5479  LoadSDNode *LN0 = cast<LoadSDNode>(N0);
5480  SDValue ExtLoad = DAG.getExtLoad(ISD::SEXTLOAD, SDLoc(N), VT,
5481  LN0->getChain(),
5482  LN0->getBasePtr(), EVT,
5483  LN0->getMemOperand());
5484  CombineTo(N, ExtLoad);
5485  CombineTo(N0.getNode(), ExtLoad, ExtLoad.getValue(1));
5486  AddToWorkList(ExtLoad.getNode());
5487  return SDValue(N, 0); // Return N so it doesn't get rechecked!
5488  }
5489  // fold (sext_inreg (zextload x)) -> (sextload x) iff load has one use
5490  if (ISD::isZEXTLoad(N0.getNode()) && ISD::isUNINDEXEDLoad(N0.getNode()) &&
5491  N0.hasOneUse() &&
5492  EVT == cast<LoadSDNode>(N0)->getMemoryVT() &&
5493  ((!LegalOperations && !cast<LoadSDNode>(N0)->isVolatile()) ||
5494  TLI.isLoadExtLegal(ISD::SEXTLOAD, EVT))) {
5495  LoadSDNode *LN0 = cast<LoadSDNode>(N0);
5496  SDValue ExtLoad = DAG.getExtLoad(ISD::SEXTLOAD, SDLoc(N), VT,
5497  LN0->getChain(),
5498  LN0->getBasePtr(), EVT,
5499  LN0->getMemOperand());
5500  CombineTo(N, ExtLoad);
5501  CombineTo(N0.getNode(), ExtLoad, ExtLoad.getValue(1));
5502  return SDValue(N, 0); // Return N so it doesn't get rechecked!
5503  }
5504 
5505  // Form (sext_inreg (bswap >> 16)) or (sext_inreg (rotl (bswap) 16))
5506  if (EVTBits <= 16 && N0.getOpcode() == ISD::OR) {
5507  SDValue BSwap = MatchBSwapHWordLow(N0.getNode(), N0.getOperand(0),
5508  N0.getOperand(1), false);
5509  if (BSwap.getNode() != 0)
5510  return DAG.getNode(ISD::SIGN_EXTEND_INREG, SDLoc(N), VT,
5511  BSwap, N1);
5512  }
5513 
5514  return SDValue();
5515 }
5516 
5517 SDValue DAGCombiner::visitTRUNCATE(SDNode *N) {
5518  SDValue N0 = N->getOperand(0);
5519  EVT VT = N->getValueType(0);
5520  bool isLE = TLI.isLittleEndian();
5521 
5522  // noop truncate
5523  if (N0.getValueType() == N->getValueType(0))
5524  return N0;
5525  // fold (truncate c1) -> c1
5526  if (isa<ConstantSDNode>(N0))
5527  return DAG.getNode(ISD::TRUNCATE, SDLoc(N), VT, N0);
5528  // fold (truncate (truncate x)) -> (truncate x)
5529  if (N0.getOpcode() == ISD::TRUNCATE)
5530  return DAG.getNode(ISD::TRUNCATE, SDLoc(N), VT, N0.getOperand(0));
5531  // fold (truncate (ext x)) -> (ext x) or (truncate x) or x
5532  if (N0.getOpcode() == ISD::ZERO_EXTEND ||
5533  N0.getOpcode() == ISD::SIGN_EXTEND ||
5534  N0.getOpcode() == ISD::ANY_EXTEND) {
5535  if (N0.getOperand(0).getValueType().bitsLT(VT))
5536  // if the source is smaller than the dest, we still need an extend
5537  return DAG.getNode(N0.getOpcode(), SDLoc(N), VT,
5538  N0.getOperand(0));
5539  if (N0.getOperand(0).getValueType().bitsGT(VT))
5540  // if the source is larger than the dest, than we just need the truncate
5541  return DAG.getNode(ISD::TRUNCATE, SDLoc(N), VT, N0.getOperand(0));
5542  // if the source and dest are the same type, we can drop both the extend
5543  // and the truncate.
5544  return N0.getOperand(0);
5545  }
5546 
5547  // Fold extract-and-trunc into a narrow extract. For example:
5548  // i64 x = EXTRACT_VECTOR_ELT(v2i64 val, i32 1)
5549  // i32 y = TRUNCATE(i64 x)
5550  // -- becomes --
5551  // v16i8 b = BITCAST (v2i64 val)
5552  // i8 x = EXTRACT_VECTOR_ELT(v16i8 b, i32 8)
5553  //
5554  // Note: We only run this optimization after type legalization (which often
5555  // creates this pattern) and before operation legalization after which
5556  // we need to be more careful about the vector instructions that we generate.
5557  if (N0.getOpcode() == ISD::EXTRACT_VECTOR_ELT &&
5558  LegalTypes && !LegalOperations && N0->hasOneUse()) {
5559 
5560  EVT VecTy = N0.getOperand(0).getValueType();
5561  EVT ExTy = N0.getValueType();
5562  EVT TrTy = N->getValueType(0);
5563 
5564  unsigned NumElem = VecTy.getVectorNumElements();
5565  unsigned SizeRatio = ExTy.getSizeInBits()/TrTy.getSizeInBits();
5566 
5567  EVT NVT = EVT::getVectorVT(*DAG.getContext(), TrTy, SizeRatio * NumElem);
5568  assert(NVT.getSizeInBits() == VecTy.getSizeInBits() && "Invalid Size");
5569 
5570  SDValue EltNo = N0->getOperand(1);
5571  if (isa<ConstantSDNode>(EltNo) && isTypeLegal(NVT)) {
5572  int Elt = cast<ConstantSDNode>(EltNo)->getZExtValue();
5573  EVT IndexTy = TLI.getVectorIdxTy();
5574  int Index = isLE ? (Elt*SizeRatio) : (Elt*SizeRatio + (SizeRatio-1));
5575 
5576  SDValue V = DAG.getNode(ISD::BITCAST, SDLoc(N),
5577  NVT, N0.getOperand(0));
5578 
5579  return DAG.getNode(ISD::EXTRACT_VECTOR_ELT,
5580  SDLoc(N), TrTy, V,
5581  DAG.getConstant(Index, IndexTy));
5582  }
5583  }
5584 
5585  // Fold a series of buildvector, bitcast, and truncate if possible.
5586  // For example fold
5587  // (2xi32 trunc (bitcast ((4xi32)buildvector x, x, y, y) 2xi64)) to
5588  // (2xi32 (buildvector x, y)).
5589  if (Level == AfterLegalizeVectorOps && VT.isVector() &&
5590  N0.getOpcode() == ISD::BITCAST && N0.hasOneUse() &&
5591  N0.getOperand(0).getOpcode() == ISD::BUILD_VECTOR &&
5592  N0.getOperand(0).hasOneUse()) {
5593 
5594  SDValue BuildVect = N0.getOperand(0);
5595  EVT BuildVectEltTy = BuildVect.getValueType().getVectorElementType();
5596  EVT TruncVecEltTy = VT.getVectorElementType();
5597 
5598  // Check that the element types match.
5599  if (BuildVectEltTy == TruncVecEltTy) {
5600  // Now we only need to compute the offset of the truncated elements.
5601  unsigned BuildVecNumElts = BuildVect.getNumOperands();
5602  unsigned TruncVecNumElts = VT.getVectorNumElements();
5603  unsigned TruncEltOffset = BuildVecNumElts / TruncVecNumElts;
5604 
5605  assert((BuildVecNumElts % TruncVecNumElts) == 0 &&
5606  "Invalid number of elements");
5607 
5609  for (unsigned i = 0, e = BuildVecNumElts; i != e; i += TruncEltOffset)
5610  Opnds.push_back(BuildVect.getOperand(i));
5611 
5612  return DAG.getNode(ISD::BUILD_VECTOR, SDLoc(N), VT, &Opnds[0],
5613  Opnds.size());
5614  }
5615  }
5616 
5617  // See if we can simplify the input to this truncate through knowledge that
5618  // only the low bits are being used.
5619  // For example "trunc (or (shl x, 8), y)" // -> trunc y
5620  // Currently we only perform this optimization on scalars because vectors
5621  // may have different active low bits.
5622  if (!VT.isVector()) {
5623  SDValue Shorter =
5624  GetDemandedBits(N0, APInt::getLowBitsSet(N0.getValueSizeInBits(),
5625  VT.getSizeInBits()));
5626  if (Shorter.getNode())
5627  return DAG.getNode(ISD::TRUNCATE, SDLoc(N), VT, Shorter);
5628  }
5629  // fold (truncate (load x)) -> (smaller load x)
5630  // fold (truncate (srl (load x), c)) -> (smaller load (x+c/evtbits))
5631  if (!LegalTypes || TLI.isTypeDesirableForOp(N0.getOpcode(), VT)) {
5632  SDValue Reduced = ReduceLoadWidth(N);
5633  if (Reduced.getNode())
5634  return Reduced;
5635  }
5636  // fold (trunc (concat ... x ...)) -> (concat ..., (trunc x), ...)),
5637  // where ... are all 'undef'.
5638  if (N0.getOpcode() == ISD::CONCAT_VECTORS && !LegalTypes) {
5639  SmallVector<EVT, 8> VTs;
5640  SDValue V;
5641  unsigned Idx = 0;
5642  unsigned NumDefs = 0;
5643 
5644  for (unsigned i = 0, e = N0.getNumOperands(); i != e; ++i) {
5645  SDValue X = N0.getOperand(i);
5646  if (X.getOpcode() != ISD::UNDEF) {
5647  V = X;
5648  Idx = i;
5649  NumDefs++;
5650  }
5651  // Stop if more than one members are non-undef.
5652  if (NumDefs > 1)
5653  break;
5654  VTs.push_back(EVT::getVectorVT(*DAG.getContext(),
5655  VT.getVectorElementType(),
5657  }
5658 
5659  if (NumDefs == 0)
5660  return DAG.getUNDEF(VT);
5661 
5662  if (NumDefs == 1) {
5663  assert(V.getNode() && "The single defined operand is empty!");
5665  for (unsigned i = 0, e = VTs.size(); i != e; ++i) {
5666  if (i != Idx) {
5667  Opnds.push_back(DAG.getUNDEF(VTs[i]));
5668  continue;
5669  }
5670  SDValue NV = DAG.getNode(ISD::TRUNCATE, SDLoc(V), VTs[i], V);
5671  AddToWorkList(NV.getNode());
5672  Opnds.push_back(NV);
5673  }
5674  return DAG.getNode(ISD::CONCAT_VECTORS, SDLoc(N), VT,
5675  &Opnds[0], Opnds.size());
5676  }
5677  }
5678 
5679  // Simplify the operands using demanded-bits information.
5680  if (!VT.isVector() &&
5682  return SDValue(N, 0);
5683 
5684  return SDValue();
5685 }
5686 
5687 static SDNode *getBuildPairElt(SDNode *N, unsigned i) {
5688  SDValue Elt = N->getOperand(i);
5689  if (Elt.getOpcode() != ISD::MERGE_VALUES)
5690  return Elt.getNode();
5691  return Elt.getOperand(Elt.getResNo()).getNode();
5692 }
5693 
5694 /// CombineConsecutiveLoads - build_pair (load, load) -> load
5695 /// if load locations are consecutive.
5696 SDValue DAGCombiner::CombineConsecutiveLoads(SDNode *N, EVT VT) {
5697  assert(N->getOpcode() == ISD::BUILD_PAIR);
5698 
5701  if (!LD1 || !LD2 || !ISD::isNON_EXTLoad(LD1) || !LD1->hasOneUse() ||
5702  LD1->getPointerInfo().getAddrSpace() !=
5703  LD2->getPointerInfo().getAddrSpace())
5704  return SDValue();
5705  EVT LD1VT = LD1->getValueType(0);
5706 
5707  if (ISD::isNON_EXTLoad(LD2) &&
5708  LD2->hasOneUse() &&
5709  // If both are volatile this would reduce the number of volatile loads.
5710  // If one is volatile it might be ok, but play conservative and bail out.
5711  !LD1->isVolatile() &&
5712  !LD2->isVolatile() &&
5713  DAG.isConsecutiveLoad(LD2, LD1, LD1VT.getSizeInBits()/8, 1)) {
5714  unsigned Align = LD1->getAlignment();
5715  unsigned NewAlign = TLI.getDataLayout()->
5716  getABITypeAlignment(VT.getTypeForEVT(*DAG.getContext()));
5717 
5718  if (NewAlign <= Align &&
5719  (!LegalOperations || TLI.isOperationLegal(ISD::LOAD, VT)))
5720  return DAG.getLoad(VT, SDLoc(N), LD1->getChain(),
5721  LD1->getBasePtr(), LD1->getPointerInfo(),
5722  false, false, false, Align);
5723  }
5724 
5725  return SDValue();
5726 }
5727 
5728 SDValue DAGCombiner::visitBITCAST(SDNode *N) {
5729  SDValue N0 = N->getOperand(0);
5730  EVT VT = N->getValueType(0);
5731 
5732  // If the input is a BUILD_VECTOR with all constant elements, fold this now.
5733  // Only do this before legalize, since afterward the target may be depending
5734  // on the bitconvert.
5735  // First check to see if this is all constant.
5736  if (!LegalTypes &&
5737  N0.getOpcode() == ISD::BUILD_VECTOR && N0.getNode()->hasOneUse() &&
5738  VT.isVector()) {
5739  bool isSimple = true;
5740  for (unsigned i = 0, e = N0.getNumOperands(); i != e; ++i)
5741  if (N0.getOperand(i).getOpcode() != ISD::UNDEF &&
5742  N0.getOperand(i).getOpcode() != ISD::Constant &&
5743  N0.getOperand(i).getOpcode() != ISD::ConstantFP) {
5744  isSimple = false;
5745  break;
5746  }
5747 
5748  EVT DestEltVT = N->getValueType(0).getVectorElementType();
5749  assert(!DestEltVT.isVector() &&
5750  "Element type of vector ValueType must not be vector!");
5751  if (isSimple)
5752  return ConstantFoldBITCASTofBUILD_VECTOR(N0.getNode(), DestEltVT);
5753  }
5754 
5755  // If the input is a constant, let getNode fold it.
5756  if (isa<ConstantSDNode>(N0) || isa<ConstantFPSDNode>(N0)) {
5757  SDValue Res = DAG.getNode(ISD::BITCAST, SDLoc(N), VT, N0);
5758  if (Res.getNode() != N) {
5759  if (!LegalOperations ||
5760  TLI.isOperationLegal(Res.getNode()->getOpcode(), VT))
5761  return Res;
5762 
5763  // Folding it resulted in an illegal node, and it's too late to
5764  // do that. Clean up the old node and forego the transformation.
5765  // Ideally this won't happen very often, because instcombine
5766  // and the earlier dagcombine runs (where illegal nodes are
5767  // permitted) should have folded most of them already.
5768  DAG.DeleteNode(Res.getNode());
5769  }
5770  }
5771 
5772  // (conv (conv x, t1), t2) -> (conv x, t2)
5773  if (N0.getOpcode() == ISD::BITCAST)
5774  return DAG.getNode(ISD::BITCAST, SDLoc(N), VT,
5775  N0.getOperand(0));
5776 
5777  // fold (conv (load x)) -> (load (conv*)x)
5778  // If the resultant load doesn't need a higher alignment than the original!
5779  if (ISD::isNormalLoad(N0.getNode()) && N0.hasOneUse() &&
5780  // Do not change the width of a volatile load.
5781  !cast<LoadSDNode>(N0)->isVolatile() &&
5782  (!LegalOperations || TLI.isOperationLegal(ISD::LOAD, VT)) &&
5783  TLI.isLoadBitCastBeneficial(N0.getValueType(), VT)) {
5784  LoadSDNode *LN0 = cast<LoadSDNode>(N0);
5785  unsigned Align = TLI.getDataLayout()->
5786  getABITypeAlignment(VT.getTypeForEVT(*DAG.getContext()));
5787  unsigned OrigAlign = LN0->getAlignment();
5788 
5789  if (Align <= OrigAlign) {
5790  SDValue Load = DAG.getLoad(VT, SDLoc(N), LN0->getChain(),
5791  LN0->getBasePtr(), LN0->getPointerInfo(),
5792  LN0->isVolatile(), LN0->isNonTemporal(),
5793  LN0->isInvariant(), OrigAlign,
5794  LN0->getTBAAInfo());
5795  AddToWorkList(N);
5796  CombineTo(N0.getNode(),
5797  DAG.getNode(ISD::BITCAST, SDLoc(N0),
5798  N0.getValueType(), Load),
5799  Load.getValue(1));
5800  return Load;
5801  }
5802  }
5803 
5804  // fold (bitconvert (fneg x)) -> (xor (bitconvert x), signbit)
5805  // fold (bitconvert (fabs x)) -> (and (bitconvert x), (not signbit))
5806  // This often reduces constant pool loads.
5807  if (((N0.getOpcode() == ISD::FNEG && !TLI.isFNegFree(N0.getValueType())) ||
5808  (N0.getOpcode() == ISD::FABS && !TLI.isFAbsFree(N0.getValueType()))) &&
5809  N0.getNode()->hasOneUse() && VT.isInteger() &&
5810  !VT.isVector() && !N0.getValueType().isVector()) {
5811  SDValue NewConv = DAG.getNode(ISD::BITCAST, SDLoc(N0), VT,
5812  N0.getOperand(0));
5813  AddToWorkList(NewConv.getNode());
5814 
5815  APInt SignBit = APInt::getSignBit(VT.getSizeInBits());
5816  if (N0.getOpcode() == ISD::FNEG)
5817  return DAG.getNode(ISD::XOR, SDLoc(N), VT,
5818  NewConv, DAG.getConstant(SignBit, VT));
5819  assert(N0.getOpcode() == ISD::FABS);
5820  return DAG.getNode(ISD::AND, SDLoc(N), VT,
5821  NewConv, DAG.getConstant(~SignBit, VT));
5822  }
5823 
5824  // fold (bitconvert (fcopysign cst, x)) ->
5825  // (or (and (bitconvert x), sign), (and cst, (not sign)))
5826  // Note that we don't handle (copysign x, cst) because this can always be
5827  // folded to an fneg or fabs.
5828  if (N0.getOpcode() == ISD::FCOPYSIGN && N0.getNode()->hasOneUse() &&
5829  isa<ConstantFPSDNode>(N0.getOperand(0)) &&
5830  VT.isInteger() && !VT.isVector()) {
5831  unsigned OrigXWidth = N0.getOperand(1).getValueType().getSizeInBits();
5832  EVT IntXVT = EVT::getIntegerVT(*DAG.getContext(), OrigXWidth);
5833  if (isTypeLegal(IntXVT)) {
5834  SDValue X = DAG.getNode(ISD::BITCAST, SDLoc(N0),
5835  IntXVT, N0.getOperand(1));
5836  AddToWorkList(X.getNode());
5837 
5838  // If X has a different width than the result/lhs, sext it or truncate it.
5839  unsigned VTWidth = VT.getSizeInBits();
5840  if (OrigXWidth < VTWidth) {
5841  X = DAG.getNode(ISD::SIGN_EXTEND, SDLoc(N), VT, X);
5842  AddToWorkList(X.getNode());
5843  } else if (OrigXWidth > VTWidth) {
5844  // To get the sign bit in the right place, we have to shift it right
5845  // before truncating.
5846  X = DAG.getNode(ISD::SRL, SDLoc(X),
5847  X.getValueType(), X,
5848  DAG.getConstant(OrigXWidth-VTWidth, X.getValueType()));
5849  AddToWorkList(X.getNode());
5850  X = DAG.getNode(ISD::TRUNCATE, SDLoc(X), VT, X);
5851  AddToWorkList(X.getNode());
5852  }
5853 
5854  APInt SignBit = APInt::getSignBit(VT.getSizeInBits());
5855  X = DAG.getNode(ISD::AND, SDLoc(X), VT,
5856  X, DAG.getConstant(SignBit, VT));
5857  AddToWorkList(X.getNode());
5858 
5859  SDValue Cst = DAG.getNode(ISD::BITCAST, SDLoc(N0),
5860  VT, N0.getOperand(0));
5861  Cst = DAG.getNode(ISD::AND, SDLoc(Cst), VT,
5862  Cst, DAG.getConstant(~SignBit, VT));
5863  AddToWorkList(Cst.getNode());
5864 
5865  return DAG.getNode(ISD::OR, SDLoc(N), VT, X, Cst);
5866  }
5867  }
5868 
5869  // bitconvert(build_pair(ld, ld)) -> ld iff load locations are consecutive.
5870  if (N0.getOpcode() == ISD::BUILD_PAIR) {
5871  SDValue CombineLD = CombineConsecutiveLoads(N0.getNode(), VT);
5872  if (CombineLD.getNode())
5873  return CombineLD;
5874  }
5875 
5876  return SDValue();
5877 }
5878 
5879 SDValue DAGCombiner::visitBUILD_PAIR(SDNode *N) {
5880  EVT VT = N->getValueType(0);
5881  return CombineConsecutiveLoads(N, VT);
5882 }
5883 
5884 /// ConstantFoldBITCASTofBUILD_VECTOR - We know that BV is a build_vector
5885 /// node with Constant, ConstantFP or Undef operands. DstEltVT indicates the
5886 /// destination element value type.
5887 SDValue DAGCombiner::
5888 ConstantFoldBITCASTofBUILD_VECTOR(SDNode *BV, EVT DstEltVT) {
5889  EVT SrcEltVT = BV->getValueType(0).getVectorElementType();
5890 
5891  // If this is already the right type, we're done.
5892  if (SrcEltVT == DstEltVT) return SDValue(BV, 0);
5893 
5894  unsigned SrcBitSize = SrcEltVT.getSizeInBits();
5895  unsigned DstBitSize = DstEltVT.getSizeInBits();
5896 
5897  // If this is a conversion of N elements of one type to N elements of another
5898  // type, convert each element. This handles FP<->INT cases.
5899  if (SrcBitSize == DstBitSize) {
5900  EVT VT = EVT::getVectorVT(*DAG.getContext(), DstEltVT,
5902 
5903  // Due to the FP element handling below calling this routine recursively,
5904  // we can end up with a scalar-to-vector node here.
5905  if (BV->getOpcode() == ISD::SCALAR_TO_VECTOR)
5906  return DAG.getNode(ISD::SCALAR_TO_VECTOR, SDLoc(BV), VT,
5907  DAG.getNode(ISD::BITCAST, SDLoc(BV),
5908  DstEltVT, BV->getOperand(0)));
5909 
5911  for (unsigned i = 0, e = BV->getNumOperands(); i != e; ++i) {
5912  SDValue Op = BV->getOperand(i);
5913  // If the vector element type is not legal, the BUILD_VECTOR operands
5914  // are promoted and implicitly truncated. Make that explicit here.
5915  if (Op.getValueType() != SrcEltVT)
5916  Op = DAG.getNode(ISD::TRUNCATE, SDLoc(BV), SrcEltVT, Op);
5917  Ops.push_back(DAG.getNode(ISD::BITCAST, SDLoc(BV),
5918  DstEltVT, Op));
5919  AddToWorkList(Ops.back().getNode());
5920  }
5921  return DAG.getNode(ISD::BUILD_VECTOR, SDLoc(BV), VT,
5922  &Ops[0], Ops.size());
5923  }
5924 
5925  // Otherwise, we're growing or shrinking the elements. To avoid having to
5926  // handle annoying details of growing/shrinking FP values, we convert them to
5927  // int first.
5928  if (SrcEltVT.isFloatingPoint()) {
5929  // Convert the input float vector to a int vector where the elements are the
5930  // same sizes.
5931  assert((SrcEltVT == MVT::f32 || SrcEltVT == MVT::f64) && "Unknown FP VT!");
5932  EVT IntVT = EVT::getIntegerVT(*DAG.getContext(), SrcEltVT.getSizeInBits());
5933  BV = ConstantFoldBITCASTofBUILD_VECTOR(BV, IntVT).getNode();
5934  SrcEltVT = IntVT;
5935  }
5936 
5937  // Now we know the input is an integer vector. If the output is a FP type,
5938  // convert to integer first, then to FP of the right size.
5939  if (DstEltVT.isFloatingPoint()) {
5940  assert((DstEltVT == MVT::f32 || DstEltVT == MVT::f64) && "Unknown FP VT!");
5941  EVT TmpVT = EVT::getIntegerVT(*DAG.getContext(), DstEltVT.getSizeInBits());
5942  SDNode *Tmp = ConstantFoldBITCASTofBUILD_VECTOR(BV, TmpVT).getNode();
5943 
5944  // Next, convert to FP elements of the same size.
5945  return ConstantFoldBITCASTofBUILD_VECTOR(Tmp, DstEltVT);
5946  }
5947 
5948  // Okay, we know the src/dst types are both integers of differing types.
5949  // Handling growing first.
5950  assert(SrcEltVT.isInteger() && DstEltVT.isInteger());
5951  if (SrcBitSize < DstBitSize) {
5952  unsigned NumInputsPerOutput = DstBitSize/SrcBitSize;
5953 
5955  for (unsigned i = 0, e = BV->getNumOperands(); i != e;
5956  i += NumInputsPerOutput) {
5957  bool isLE = TLI.isLittleEndian();
5958  APInt NewBits = APInt(DstBitSize, 0);
5959  bool EltIsUndef = true;
5960  for (unsigned j = 0; j != NumInputsPerOutput; ++j) {
5961  // Shift the previously computed bits over.
5962  NewBits <<= SrcBitSize;
5963  SDValue Op = BV->getOperand(i+ (isLE ? (NumInputsPerOutput-j-1) : j));
5964  if (Op.getOpcode() == ISD::UNDEF) continue;
5965  EltIsUndef = false;
5966 
5967  NewBits |= cast<ConstantSDNode>(Op)->getAPIntValue().
5968  zextOrTrunc(SrcBitSize).zext(DstBitSize);
5969  }
5970 
5971  if (EltIsUndef)
5972  Ops.push_back(DAG.getUNDEF(DstEltVT));
5973  else
5974  Ops.push_back(DAG.getConstant(NewBits, DstEltVT));
5975  }
5976 
5977  EVT VT = EVT::getVectorVT(*DAG.getContext(), DstEltVT, Ops.size());
5978  return DAG.getNode(ISD::BUILD_VECTOR, SDLoc(BV), VT,
5979  &Ops[0], Ops.size());
5980  }
5981 
5982  // Finally, this must be the case where we are shrinking elements: each input
5983  // turns into multiple outputs.
5984  bool isS2V = ISD::isScalarToVector(BV);
5985  unsigned NumOutputsPerInput = SrcBitSize/DstBitSize;
5986  EVT VT = EVT::getVectorVT(*DAG.getContext(), DstEltVT,
5987  NumOutputsPerInput*BV->getNumOperands());
5989 
5990  for (unsigned i = 0, e = BV->getNumOperands(); i != e; ++i) {
5991  if (BV->getOperand(i).getOpcode() == ISD::UNDEF) {
5992  for (unsigned j = 0; j != NumOutputsPerInput; ++j)
5993  Ops.push_back(DAG.getUNDEF(DstEltVT));
5994  continue;
5995  }
5996 
5997  APInt OpVal = cast<ConstantSDNode>(BV->getOperand(i))->
5998  getAPIntValue().zextOrTrunc(SrcBitSize);
5999 
6000  for (unsigned j = 0; j != NumOutputsPerInput; ++j) {
6001  APInt ThisVal = OpVal.trunc(DstBitSize);
6002  Ops.push_back(DAG.getConstant(ThisVal, DstEltVT));
6003  if (isS2V && i == 0 && j == 0 && ThisVal.zext(SrcBitSize) == OpVal)
6004  // Simply turn this into a SCALAR_TO_VECTOR of the new type.
6005  return DAG.getNode(ISD::SCALAR_TO_VECTOR, SDLoc(BV), VT,
6006  Ops[0]);
6007  OpVal = OpVal.lshr(DstBitSize);
6008  }
6009 
6010  // For big endian targets, swap the order of the pieces of each element.
6011  if (TLI.isBigEndian())
6012  std::reverse(Ops.end()-NumOutputsPerInput, Ops.end());
6013  }
6014 
6015  return DAG.getNode(ISD::BUILD_VECTOR, SDLoc(BV), VT,
6016  &Ops[0], Ops.size());
6017 }
6018 
6019 SDValue DAGCombiner::visitFADD(SDNode *N) {
6020  SDValue N0 = N->getOperand(0);
6021  SDValue N1 = N->getOperand(1);
6024  EVT VT = N->getValueType(0);
6025 
6026  // fold vector ops
6027  if (VT.isVector()) {
6028  SDValue FoldedVOp = SimplifyVBinOp(N);
6029  if (FoldedVOp.getNode()) return FoldedVOp;
6030  }
6031 
6032  // fold (fadd c1, c2) -> c1 + c2
6033  if (N0CFP && N1CFP)
6034  return DAG.getNode(ISD::FADD, SDLoc(N), VT, N0, N1);
6035  // canonicalize constant to RHS
6036  if (N0CFP && !N1CFP)
6037  return DAG.getNode(ISD::FADD, SDLoc(N), VT, N1, N0);
6038  // fold (fadd A, 0) -> A
6039  if (DAG.getTarget().Options.UnsafeFPMath && N1CFP &&
6040  N1CFP->getValueAPF().isZero())
6041  return N0;
6042  // fold (fadd A, (fneg B)) -> (fsub A, B)
6043  if ((!LegalOperations || TLI.isOperationLegalOrCustom(ISD::FSUB, VT)) &&
6044  isNegatibleForFree(N1, LegalOperations, TLI, &DAG.getTarget().Options) == 2)
6045  return DAG.getNode(ISD::FSUB, SDLoc(N), VT, N0,
6046  GetNegatedExpression(N1, DAG, LegalOperations));
6047  // fold (fadd (fneg A), B) -> (fsub B, A)
6048  if ((!LegalOperations || TLI.isOperationLegalOrCustom(ISD::FSUB, VT)) &&
6049  isNegatibleForFree(N0, LegalOperations, TLI, &DAG.getTarget().Options) == 2)
6050  return DAG.getNode(ISD::FSUB, SDLoc(N), VT, N1,
6051  GetNegatedExpression(N0, DAG, LegalOperations));
6052 
6053  // If allowed, fold (fadd (fadd x, c1), c2) -> (fadd x, (fadd c1, c2))
6054  if (DAG.getTarget().Options.UnsafeFPMath && N1CFP &&
6055  N0.getOpcode() == ISD::FADD && N0.getNode()->hasOneUse() &&
6056  isa<ConstantFPSDNode>(N0.getOperand(1)))
6057  return DAG.getNode(ISD::FADD, SDLoc(N), VT, N0.getOperand(0),
6058  DAG.getNode(ISD::FADD, SDLoc(N), VT,
6059  N0.getOperand(1), N1));
6060 
6061  // No FP constant should be created after legalization as Instruction
6062  // Selection pass has hard time in dealing with FP constant.
6063  //
6064  // We don't need test this condition for transformation like following, as
6065  // the DAG being transformed implies it is legal to take FP constant as
6066  // operand.
6067  //
6068  // (fadd (fmul c, x), x) -> (fmul c+1, x)
6069  //
6070  bool AllowNewFpConst = (Level < AfterLegalizeDAG);
6071 
6072  // If allow, fold (fadd (fneg x), x) -> 0.0
6073  if (AllowNewFpConst && DAG.getTarget().Options.UnsafeFPMath &&
6074  N0.getOpcode() == ISD::FNEG && N0.getOperand(0) == N1)
6075  return DAG.getConstantFP(0.0, VT);
6076 
6077  // If allow, fold (fadd x, (fneg x)) -> 0.0
6078  if (AllowNewFpConst && DAG.getTarget().Options.UnsafeFPMath &&
6079  N1.getOpcode() == ISD::FNEG && N1.getOperand(0) == N0)
6080  return DAG.getConstantFP(0.0, VT);
6081 
6082  // In unsafe math mode, we can fold chains of FADD's of the same value
6083  // into multiplications. This transform is not safe in general because
6084  // we are reducing the number of rounding steps.
6085  if (DAG.getTarget().Options.UnsafeFPMath &&
6086  TLI.isOperationLegalOrCustom(ISD::FMUL, VT) &&
6087  !N0CFP && !N1CFP) {
6088  if (N0.getOpcode() == ISD::FMUL) {
6091 
6092  // (fadd (fmul c, x), x) -> (fmul x, c+1)
6093  if (CFP00 && !CFP01 && N0.getOperand(1) == N1) {
6094  SDValue NewCFP = DAG.getNode(ISD::FADD, SDLoc(N), VT,
6095  SDValue(CFP00, 0),
6096  DAG.getConstantFP(1.0, VT));
6097  return DAG.getNode(ISD::FMUL, SDLoc(N), VT,
6098  N1, NewCFP);
6099  }
6100 
6101  // (fadd (fmul x, c), x) -> (fmul x, c+1)
6102  if (CFP01 && !CFP00 && N0.getOperand(0) == N1) {
6103  SDValue NewCFP = DAG.getNode(ISD::FADD, SDLoc(N), VT,
6104  SDValue(CFP01, 0),
6105  DAG.getConstantFP(1.0, VT));
6106  return DAG.getNode(ISD::FMUL, SDLoc(N), VT,
6107  N1, NewCFP);
6108  }
6109 
6110  // (fadd (fmul c, x), (fadd x, x)) -> (fmul x, c+2)
6111  if (CFP00 && !CFP01 && N1.getOpcode() == ISD::FADD &&
6112  N1.getOperand(0) == N1.getOperand(1) &&
6113  N0.getOperand(1) == N1.getOperand(0)) {
6114  SDValue NewCFP = DAG.getNode(ISD::FADD, SDLoc(N), VT,
6115  SDValue(CFP00, 0),
6116  DAG.getConstantFP(2.0, VT));
6117  return DAG.getNode(ISD::FMUL, SDLoc(N), VT,
6118  N0.getOperand(1), NewCFP);
6119  }
6120 
6121  // (fadd (fmul x, c), (fadd x, x)) -> (fmul x, c+2)
6122  if (CFP01 && !CFP00 && N1.getOpcode() == ISD::FADD &&
6123  N1.getOperand(0) == N1.getOperand(1) &&
6124  N0.getOperand(0) == N1.getOperand(0)) {
6125  SDValue NewCFP = DAG.getNode(ISD::FADD, SDLoc(N), VT,
6126  SDValue(CFP01, 0),
6127  DAG.getConstantFP(2.0, VT));
6128  return DAG.getNode(ISD::FMUL, SDLoc(N), VT,
6129  N0.getOperand(0), NewCFP);
6130  }
6131  }
6132 
6133  if (N1.getOpcode() == ISD::FMUL) {
6136 
6137  // (fadd x, (fmul c, x)) -> (fmul x, c+1)
6138  if (CFP10 && !CFP11 && N1.getOperand(1) == N0) {
6139  SDValue NewCFP = DAG.getNode(ISD::FADD, SDLoc(N), VT,
6140  SDValue(CFP10, 0),
6141  DAG.getConstantFP(1.0, VT));
6142  return DAG.getNode(ISD::FMUL, SDLoc(N), VT,
6143  N0, NewCFP);
6144  }
6145 
6146  // (fadd x, (fmul x, c)) -> (fmul x, c+1)
6147  if (CFP11 && !CFP10 && N1.getOperand(0) == N0) {
6148  SDValue NewCFP = DAG.getNode(ISD::FADD, SDLoc(N), VT,
6149  SDValue(CFP11, 0),
6150  DAG.getConstantFP(1.0, VT));
6151  return DAG.getNode(ISD::FMUL, SDLoc(N), VT,
6152  N0, NewCFP);
6153  }
6154 
6155 
6156  // (fadd (fadd x, x), (fmul c, x)) -> (fmul x, c+2)
6157  if (CFP10 && !CFP11 && N0.getOpcode() == ISD::FADD &&
6158  N0.getOperand(0) == N0.getOperand(1) &&
6159  N1.getOperand(1) == N0.getOperand(0)) {
6160  SDValue NewCFP = DAG.getNode(ISD::FADD, SDLoc(N), VT,
6161  SDValue(CFP10, 0),
6162  DAG.getConstantFP(2.0, VT));
6163  return DAG.getNode(ISD::FMUL, SDLoc(N), VT,
6164  N1.getOperand(1), NewCFP);
6165  }
6166 
6167  // (fadd (fadd x, x), (fmul x, c)) -> (fmul x, c+2)
6168  if (CFP11 && !CFP10 && N0.getOpcode() == ISD::FADD &&
6169  N0.getOperand(0) == N0.getOperand(1) &&
6170  N1.getOperand(0) == N0.getOperand(0)) {
6171  SDValue NewCFP = DAG.getNode(ISD::FADD, SDLoc(N), VT,
6172  SDValue(CFP11, 0),
6173  DAG.getConstantFP(2.0, VT));
6174  return DAG.getNode(ISD::FMUL, SDLoc(N), VT,
6175  N1.getOperand(0), NewCFP);
6176  }
6177  }
6178 
6179  if (N0.getOpcode() == ISD::FADD && AllowNewFpConst) {
6181  // (fadd (fadd x, x), x) -> (fmul x, 3.0)
6182  if (!CFP && N0.getOperand(0) == N0.getOperand(1) &&
6183  (N0.getOperand(0) == N1))
6184  return DAG.getNode(ISD::FMUL, SDLoc(N), VT,
6185  N1, DAG.getConstantFP(3.0, VT));
6186  }
6187 
6188  if (N1.getOpcode() == ISD::FADD && AllowNewFpConst) {
6190  // (fadd x, (fadd x, x)) -> (fmul x, 3.0)
6191  if (!CFP10 && N1.getOperand(0) == N1.getOperand(1) &&
6192  N1.getOperand(0) == N0)
6193  return DAG.getNode(ISD::FMUL, SDLoc(N), VT,
6194  N0, DAG.getConstantFP(3.0, VT));
6195  }
6196 
6197  // (fadd (fadd x, x), (fadd x, x)) -> (fmul x, 4.0)
6198  if (AllowNewFpConst &&
6199  N0.getOpcode() == ISD::FADD && N1.getOpcode() == ISD::FADD &&
6200  N0.getOperand(0) == N0.getOperand(1) &&
6201  N1.getOperand(0) == N1.getOperand(1) &&
6202  N0.getOperand(0) == N1.getOperand(0))
6203  return DAG.getNode(ISD::FMUL, SDLoc(N), VT,
6204  N0.getOperand(0),
6205  DAG.getConstantFP(4.0, VT));
6206  }
6207 
6208  // FADD -> FMA combines:
6209  if ((DAG.getTarget().Options.AllowFPOpFusion == FPOpFusion::Fast ||
6210  DAG.getTarget().Options.UnsafeFPMath) &&
6211  DAG.getTarget().getTargetLowering()->isFMAFasterThanFMulAndFAdd(VT) &&
6212  (!LegalOperations || TLI.isOperationLegalOrCustom(ISD::FMA, VT))) {
6213 
6214  // fold (fadd (fmul x, y), z) -> (fma x, y, z)
6215  if (N0.getOpcode() == ISD::FMUL && N0->hasOneUse())
6216  return DAG.getNode(ISD::FMA, SDLoc(N), VT,
6217  N0.getOperand(0), N0.getOperand(1), N1);
6218 
6219  // fold (fadd x, (fmul y, z)) -> (fma y, z, x)
6220  // Note: Commutes FADD operands.
6221  if (N1.getOpcode() == ISD::FMUL && N1->hasOneUse())
6222  return DAG.getNode(ISD::FMA, SDLoc(N), VT,
6223  N1.getOperand(0), N1.getOperand(1), N0);
6224  }
6225 
6226  return SDValue();
6227 }
6228 
6229 SDValue DAGCombiner::visitFSUB(SDNode *N) {
6230  SDValue N0 = N->getOperand(0);
6231  SDValue N1 = N->getOperand(1);
6234  EVT VT = N->getValueType(0);
6235  SDLoc dl(N);
6236 
6237  // fold vector ops
6238  if (VT.isVector()) {
6239  SDValue FoldedVOp = SimplifyVBinOp(N);
6240  if (FoldedVOp.getNode()) return FoldedVOp;
6241  }
6242 
6243  // fold (fsub c1, c2) -> c1-c2
6244  if (N0CFP && N1CFP)
6245  return DAG.getNode(ISD::FSUB, SDLoc(N), VT, N0, N1);
6246  // fold (fsub A, 0) -> A
6247  if (DAG.getTarget().Options.UnsafeFPMath &&
6248  N1CFP && N1CFP->getValueAPF().isZero())
6249  return N0;
6250  // fold (fsub 0, B) -> -B
6251  if (DAG.getTarget().Options.UnsafeFPMath &&
6252  N0CFP && N0CFP->getValueAPF().isZero()) {
6253  if (isNegatibleForFree(N1, LegalOperations, TLI, &DAG.getTarget().Options))
6254  return GetNegatedExpression(N1, DAG, LegalOperations);
6255  if (!LegalOperations || TLI.isOperationLegal(ISD::FNEG, VT))
6256  return DAG.getNode(ISD::FNEG, dl, VT, N1);
6257  }
6258  // fold (fsub A, (fneg B)) -> (fadd A, B)
6259  if (isNegatibleForFree(N1, LegalOperations, TLI, &DAG.getTarget().Options))
6260  return DAG.getNode(ISD::FADD, dl, VT, N0,
6261  GetNegatedExpression(N1, DAG, LegalOperations));
6262 
6263  // If 'unsafe math' is enabled, fold
6264  // (fsub x, x) -> 0.0 &
6265  // (fsub x, (fadd x, y)) -> (fneg y) &
6266  // (fsub x, (fadd y, x)) -> (fneg y)
6267  if (DAG.getTarget().Options.UnsafeFPMath) {
6268  if (N0 == N1)
6269  return DAG.getConstantFP(0.0f, VT);
6270 
6271  if (N1.getOpcode() == ISD::FADD) {
6272  SDValue N10 = N1->getOperand(0);
6273  SDValue N11 = N1->getOperand(1);
6274 
6275  if (N10 == N0 && isNegatibleForFree(N11, LegalOperations, TLI,
6276  &DAG.getTarget().Options))
6277  return GetNegatedExpression(N11, DAG, LegalOperations);
6278 
6279  if (N11 == N0 && isNegatibleForFree(N10, LegalOperations, TLI,
6280  &DAG.getTarget().Options))
6281  return GetNegatedExpression(N10, DAG, LegalOperations);
6282  }
6283  }
6284 
6285  // FSUB -> FMA combines:
6286  if ((DAG.getTarget().Options.AllowFPOpFusion == FPOpFusion::Fast ||
6287  DAG.getTarget().Options.UnsafeFPMath) &&
6288  DAG.getTarget().getTargetLowering()->isFMAFasterThanFMulAndFAdd(VT) &&
6289  (!LegalOperations || TLI.isOperationLegalOrCustom(ISD::FMA, VT))) {
6290 
6291  // fold (fsub (fmul x, y), z) -> (fma x, y, (fneg z))
6292  if (N0.getOpcode() == ISD::FMUL && N0->hasOneUse())
6293  return DAG.getNode(ISD::FMA, dl, VT,
6294  N0.getOperand(0), N0.getOperand(1),
6295  DAG.getNode(ISD::FNEG, dl, VT, N1));
6296 
6297  // fold (fsub x, (fmul y, z)) -> (fma (fneg y), z, x)
6298  // Note: Commutes FSUB operands.
6299  if (N1.getOpcode() == ISD::FMUL && N1->hasOneUse())
6300  return DAG.getNode(ISD::FMA, dl, VT,
6301  DAG.getNode(ISD::FNEG, dl, VT,
6302  N1.getOperand(0)),
6303  N1.getOperand(1), N0);
6304 
6305  // fold (fsub (fneg (fmul, x, y)), z) -> (fma (fneg x), y, (fneg z))
6306  if (N0.getOpcode() == ISD::FNEG &&
6307  N0.getOperand(0).getOpcode() == ISD::FMUL &&
6308  N0->hasOneUse() && N0.getOperand(0).hasOneUse()) {
6309  SDValue N00 = N0.getOperand(0).getOperand(0);
6310  SDValue N01 = N0.getOperand(0).getOperand(1);
6311  return DAG.getNode(ISD::FMA, dl, VT,
6312  DAG.getNode(ISD::FNEG, dl, VT, N00), N01,
6313  DAG.getNode(ISD::FNEG, dl, VT, N1));
6314  }
6315  }
6316 
6317  return SDValue();
6318 }
6319 
6320 SDValue DAGCombiner::visitFMUL(SDNode *N) {
6321  SDValue N0 = N->getOperand(0);
6322  SDValue N1 = N->getOperand(1);
6325  EVT VT = N->getValueType(0);
6326  const TargetLowering &TLI = DAG.getTargetLoweringInfo();
6327 
6328  // fold vector ops
6329  if (VT.isVector()) {
6330  SDValue FoldedVOp = SimplifyVBinOp(N);
6331  if (FoldedVOp.getNode()) return FoldedVOp;
6332  }
6333 
6334  // fold (fmul c1, c2) -> c1*c2
6335  if (N0CFP && N1CFP)
6336  return DAG.getNode(ISD::FMUL, SDLoc(N), VT, N0, N1);
6337  // canonicalize constant to RHS
6338  if (N0CFP && !N1CFP)
6339  return DAG.getNode(ISD::FMUL, SDLoc(N), VT, N1, N0);
6340  // fold (fmul A, 0) -> 0
6341  if (DAG.getTarget().Options.UnsafeFPMath &&
6342  N1CFP && N1CFP->getValueAPF().isZero())
6343  return N1;
6344  // fold (fmul A, 0) -> 0, vector edition.
6345  if (DAG.getTarget().Options.UnsafeFPMath &&
6347  return N1;
6348  // fold (fmul A, 1.0) -> A
6349  if (N1CFP && N1CFP->isExactlyValue(1.0))
6350  return N0;
6351  // fold (fmul X, 2.0) -> (fadd X, X)
6352  if (N1CFP && N1CFP->isExactlyValue(+2.0))
6353  return DAG.getNode(ISD::FADD, SDLoc(N), VT, N0, N0);
6354  // fold (fmul X, -1.0) -> (fneg X)
6355  if (N1CFP && N1CFP->isExactlyValue(-1.0))
6356  if (!LegalOperations || TLI.isOperationLegal(ISD::FNEG, VT))
6357  return DAG.getNode(ISD::FNEG, SDLoc(N), VT, N0);
6358 
6359  // fold (fmul (fneg X), (fneg Y)) -> (fmul X, Y)
6360  if (char LHSNeg = isNegatibleForFree(N0, LegalOperations, TLI,
6361  &DAG.getTarget().Options)) {
6362  if (char RHSNeg = isNegatibleForFree(N1, LegalOperations, TLI,
6363  &DAG.getTarget().Options)) {
6364  // Both can be negated for free, check to see if at least one is cheaper
6365  // negated.
6366  if (LHSNeg == 2 || RHSNeg == 2)
6367  return DAG.getNode(ISD::FMUL, SDLoc(N), VT,
6368  GetNegatedExpression(N0, DAG, LegalOperations),
6369  GetNegatedExpression(N1, DAG, LegalOperations));
6370  }
6371  }
6372 
6373  // If allowed, fold (fmul (fmul x, c1), c2) -> (fmul x, (fmul c1, c2))
6374  if (DAG.getTarget().Options.UnsafeFPMath &&
6375  N1CFP && N0.getOpcode() == ISD::FMUL &&
6376  N0.getNode()->hasOneUse() && isa<ConstantFPSDNode>(N0.getOperand(1)))
6377  return DAG.getNode(ISD::FMUL, SDLoc(N), VT, N0.getOperand(0),
6378  DAG.getNode(ISD::FMUL, SDLoc(N), VT,
6379  N0.getOperand(1), N1));
6380 
6381  return SDValue();
6382 }
6383 
6384 SDValue DAGCombiner::visitFMA(SDNode *N) {
6385  SDValue N0 = N->getOperand(0);
6386  SDValue N1 = N->getOperand(1);
6387  SDValue N2 = N->getOperand(2);
6390  EVT VT = N->getValueType(0);
6391  SDLoc dl(N);
6392 
6393  if (DAG.getTarget().Options.UnsafeFPMath) {
6394  if (N0CFP && N0CFP->isZero())
6395  return N2;
6396  if (N1CFP && N1CFP->isZero())
6397  return N2;
6398  }
6399  if (N0CFP && N0CFP->isExactlyValue(1.0))
6400  return DAG.getNode(ISD::FADD, SDLoc(N), VT, N1, N2);
6401  if (N1CFP && N1CFP->isExactlyValue(1.0))
6402  return DAG.getNode(ISD::FADD, SDLoc(N), VT, N0, N2);
6403 
6404  // Canonicalize (fma c, x, y) -> (fma x, c, y)
6405  if (N0CFP && !N1CFP)
6406  return DAG.getNode(ISD::FMA, SDLoc(N), VT, N1, N0, N2);
6407 
6408  // (fma x, c1, (fmul x, c2)) -> (fmul x, c1+c2)
6409  if (DAG.getTarget().Options.UnsafeFPMath && N1CFP &&
6410  N2.getOpcode() == ISD::FMUL &&
6411  N0 == N2.getOperand(0) &&
6412  N2.getOperand(1).getOpcode() == ISD::ConstantFP) {
6413  return DAG.getNode(ISD::FMUL, dl, VT, N0,
6414  DAG.getNode(ISD::FADD, dl, VT, N1, N2.getOperand(1)));
6415  }
6416 
6417 
6418  // (fma (fmul x, c1), c2, y) -> (fma x, c1*c2, y)
6419  if (DAG.getTarget().Options.UnsafeFPMath &&
6420  N0.getOpcode() == ISD::FMUL && N1CFP &&
6421  N0.getOperand(1).getOpcode() == ISD::ConstantFP) {
6422  return DAG.getNode(ISD::FMA, dl, VT,
6423  N0.getOperand(0),
6424  DAG.getNode(ISD::FMUL, dl, VT, N1, N0.getOperand(1)),
6425  N2);
6426  }
6427 
6428  // (fma x, 1, y) -> (fadd x, y)
6429  // (fma x, -1, y) -> (fadd (fneg x), y)
6430  if (N1CFP) {
6431  if (N1CFP->isExactlyValue(1.0))
6432  return DAG.getNode(ISD::FADD, dl, VT, N0, N2);
6433 
6434  if (N1CFP->isExactlyValue(-1.0) &&
6435  (!LegalOperations || TLI.isOperationLegal(ISD::FNEG, VT))) {
6436  SDValue RHSNeg = DAG.getNode(ISD::FNEG, dl, VT, N0);
6437  AddToWorkList(RHSNeg.getNode());
6438  return DAG.getNode(ISD::FADD, dl, VT, N2, RHSNeg);
6439  }
6440  }
6441 
6442  // (fma x, c, x) -> (fmul x, (c+1))
6443  if (DAG.getTarget().Options.UnsafeFPMath && N1CFP && N0 == N2)
6444  return DAG.getNode(ISD::FMUL, dl, VT, N0,
6445  DAG.getNode(ISD::FADD, dl, VT,
6446  N1, DAG.getConstantFP(1.0, VT)));
6447 
6448  // (fma x, c, (fneg x)) -> (fmul x, (c-1))
6449  if (DAG.getTarget().Options.UnsafeFPMath && N1CFP &&
6450  N2.getOpcode() == ISD::FNEG && N2.getOperand(0) == N0)
6451  return DAG.getNode(ISD::FMUL, dl, VT, N0,
6452  DAG.getNode(ISD::FADD, dl, VT,
6453  N1, DAG.getConstantFP(-1.0, VT)));
6454 
6455 
6456  return SDValue();
6457 }
6458 
6459 SDValue DAGCombiner::visitFDIV(SDNode *N) {
6460  SDValue N0 = N->getOperand(0);
6461  SDValue N1 = N->getOperand(1);
6464  EVT VT = N->getValueType(0);
6465  const TargetLowering &TLI = DAG.getTargetLoweringInfo();
6466 
6467  // fold vector ops
6468  if (VT.isVector()) {
6469  SDValue FoldedVOp = SimplifyVBinOp(N);
6470  if (FoldedVOp.getNode()) return FoldedVOp;
6471  }
6472 
6473  // fold (fdiv c1, c2) -> c1/c2
6474  if (N0CFP && N1CFP)
6475  return DAG.getNode(ISD::FDIV, SDLoc(N), VT, N0, N1);
6476 
6477  // fold (fdiv X, c2) -> fmul X, 1/c2 if losing precision is acceptable.
6478  if (N1CFP && DAG.getTarget().Options.UnsafeFPMath) {
6479  // Compute the reciprocal 1.0 / c2.
6480  APFloat N1APF = N1CFP->getValueAPF();
6481  APFloat Recip(N1APF.getSemantics(), 1); // 1.0
6483  // Only do the transform if the reciprocal is a legal fp immediate that
6484  // isn't too nasty (eg NaN, denormal, ...).
6485  if ((st == APFloat::opOK || st == APFloat::opInexact) && // Not too nasty
6486  (!LegalOperations ||
6487  // FIXME: custom lowering of ConstantFP might fail (see e.g. ARM
6488  // backend)... we should handle this gracefully after Legalize.
6489  // TLI.isOperationLegalOrCustom(llvm::ISD::ConstantFP, VT) ||
6491  TLI.isFPImmLegal(Recip, VT)))
6492  return DAG.getNode(ISD::FMUL, SDLoc(N), VT, N0,
6493  DAG.getConstantFP(Recip, VT));
6494  }
6495 
6496  // (fdiv (fneg X), (fneg Y)) -> (fdiv X, Y)
6497  if (char LHSNeg = isNegatibleForFree(N0, LegalOperations, TLI,
6498  &DAG.getTarget().Options)) {
6499  if (char RHSNeg = isNegatibleForFree(N1, LegalOperations, TLI,
6500  &DAG.getTarget().Options)) {
6501  // Both can be negated for free, check to see if at least one is cheaper
6502  // negated.
6503  if (LHSNeg == 2 || RHSNeg == 2)
6504  return DAG.getNode(ISD::FDIV, SDLoc(N), VT,
6505  GetNegatedExpression(N0, DAG, LegalOperations),
6506  GetNegatedExpression(N1, DAG, LegalOperations));
6507  }
6508  }
6509 
6510  return SDValue();
6511 }
6512 
6513 SDValue DAGCombiner::visitFREM(SDNode *N) {
6514  SDValue N0 = N->getOperand(0);
6515  SDValue N1 = N->getOperand(1);
6518  EVT VT = N->getValueType(0);
6519 
6520  // fold (frem c1, c2) -> fmod(c1,c2)
6521  if (N0CFP && N1CFP)
6522  return DAG.getNode(ISD::FREM, SDLoc(N), VT, N0, N1);
6523 
6524  return SDValue();
6525 }
6526 
6527 SDValue DAGCombiner::visitFCOPYSIGN(SDNode *N) {
6528  SDValue N0 = N->getOperand(0);
6529  SDValue N1 = N->getOperand(1);
6532  EVT VT = N->getValueType(0);
6533 
6534  if (N0CFP && N1CFP) // Constant fold
6535  return DAG.getNode(ISD::FCOPYSIGN, SDLoc(N), VT, N0, N1);
6536 
6537  if (N1CFP) {
6538  const APFloat& V = N1CFP->getValueAPF();
6539  // copysign(x, c1) -> fabs(x) iff ispos(c1)
6540  // copysign(x, c1) -> fneg(fabs(x)) iff isneg(c1)
6541  if (!V.isNegative()) {
6542  if (!LegalOperations || TLI.isOperationLegal(ISD::FABS, VT))
6543  return DAG.getNode(ISD::FABS, SDLoc(N), VT, N0);
6544  } else {
6545  if (!LegalOperations || TLI.isOperationLegal(ISD::FNEG, VT))
6546  return DAG.getNode(ISD::FNEG, SDLoc(N), VT,
6547  DAG.getNode(ISD::FABS, SDLoc(N0), VT, N0));
6548  }
6549  }
6550 
6551  // copysign(fabs(x), y) -> copysign(x, y)
6552  // copysign(fneg(x), y) -> copysign(x, y)
6553  // copysign(copysign(x,z), y) -> copysign(x, y)
6554  if (N0.getOpcode() == ISD::FABS || N0.getOpcode() == ISD::FNEG ||
6555  N0.getOpcode() == ISD::FCOPYSIGN)
6556  return DAG.getNode(ISD::FCOPYSIGN, SDLoc(N), VT,
6557  N0.getOperand(0), N1);
6558 
6559  // copysign(x, abs(y)) -> abs(x)
6560  if (N1.getOpcode() == ISD::FABS)
6561  return DAG.getNode(ISD::FABS, SDLoc(N), VT, N0);
6562 
6563  // copysign(x, copysign(y,z)) -> copysign(x, z)
6564  if (N1.getOpcode() == ISD::FCOPYSIGN)
6565  return DAG.getNode(ISD::FCOPYSIGN, SDLoc(N), VT,
6566  N0, N1.getOperand(1));
6567 
6568  // copysign(x, fp_extend(y)) -> copysign(x, y)
6569  // copysign(x, fp_round(y)) -> copysign(x, y)
6570  if (N1.getOpcode() == ISD::FP_EXTEND || N1.getOpcode() == ISD::FP_ROUND)
6571  return DAG.getNode(ISD::FCOPYSIGN, SDLoc(N), VT,
6572  N0, N1.getOperand(0));
6573 
6574  return SDValue();
6575 }
6576 
6577 SDValue DAGCombiner::visitSINT_TO_FP(SDNode *N) {
6578  SDValue N0 = N->getOperand(0);
6580  EVT VT = N->getValueType(0);
6581  EVT OpVT = N0.getValueType();
6582 
6583  // fold (sint_to_fp c1) -> c1fp
6584  if (N0C &&
6585  // ...but only if the target supports immediate floating-point values
6586  (!LegalOperations ||
6588  return DAG.getNode(ISD::SINT_TO_FP, SDLoc(N), VT, N0);
6589 
6590  // If the input is a legal type, and SINT_TO_FP is not legal on this target,
6591  // but UINT_TO_FP is legal on this target, try to convert.
6592  if (!TLI.isOperationLegalOrCustom(ISD::SINT_TO_FP, OpVT) &&
6594  // If the sign bit is known to be zero, we can change this to UINT_TO_FP.
6595  if (DAG.SignBitIsZero(N0))
6596  return DAG.getNode(ISD::UINT_TO_FP, SDLoc(N), VT, N0);
6597  }
6598 
6599  // The next optimizations are desireable only if SELECT_CC can be lowered.
6600  // Check against MVT::Other for SELECT_CC, which is a workaround for targets
6601  // having to say they don't support SELECT_CC on every type the DAG knows
6602  // about, since there is no way to mark an opcode illegal at all value types
6603  // (See also visitSELECT)
6605  // fold (sint_to_fp (setcc x, y, cc)) -> (select_cc x, y, -1.0, 0.0,, cc)
6606  if (N0.getOpcode() == ISD::SETCC && N0.getValueType() == MVT::i1 &&
6607  !VT.isVector() &&
6608  (!LegalOperations ||
6610  SDValue Ops[] =
6611  { N0.getOperand(0), N0.getOperand(1),
6612  DAG.getConstantFP(-1.0, VT) , DAG.getConstantFP(0.0, VT),
6613  N0.getOperand(2) };
6614  return DAG.getNode(ISD::SELECT_CC, SDLoc(N), VT, Ops, 5);
6615  }
6616 
6617  // fold (sint_to_fp (zext (setcc x, y, cc))) ->
6618  // (select_cc x, y, 1.0, 0.0,, cc)
6619  if (N0.getOpcode() == ISD::ZERO_EXTEND &&
6620  N0.getOperand(0).getOpcode() == ISD::SETCC &&!VT.isVector() &&
6621  (!LegalOperations ||
6623  SDValue Ops[] =
6624  { N0.getOperand(0).getOperand(0), N0.getOperand(0).getOperand(1),
6625  DAG.getConstantFP(1.0, VT) , DAG.getConstantFP(0.0, VT),
6626  N0.getOperand(0).getOperand(2) };
6627  return DAG.getNode(ISD::SELECT_CC, SDLoc(N), VT, Ops, 5);
6628  }
6629  }
6630 
6631  return SDValue();
6632 }
6633 
6634 SDValue DAGCombiner::visitUINT_TO_FP(SDNode *N) {
6635  SDValue N0 = N->getOperand(0);
6637  EVT VT = N->getValueType(0);
6638  EVT OpVT = N0.getValueType();
6639 
6640  // fold (uint_to_fp c1) -> c1fp
6641  if (N0C &&
6642  // ...but only if the target supports immediate floating-point values
6643  (!LegalOperations ||
6645  return DAG.getNode(ISD::UINT_TO_FP, SDLoc(N), VT, N0);
6646 
6647  // If the input is a legal type, and UINT_TO_FP is not legal on this target,
6648  // but SINT_TO_FP is legal on this target, try to convert.
6649  if (!TLI.isOperationLegalOrCustom(ISD::UINT_TO_FP, OpVT) &&
6651  // If the sign bit is known to be zero, we can change this to SINT_TO_FP.
6652  if (DAG.SignBitIsZero(N0))
6653  return DAG.getNode(ISD::SINT_TO_FP, SDLoc(N), VT, N0);
6654  }
6655 
6656  // The next optimizations are desireable only if SELECT_CC can be lowered.
6657  // Check against MVT::Other for SELECT_CC, which is a workaround for targets
6658  // having to say they don't support SELECT_CC on every type the DAG knows
6659  // about, since there is no way to mark an opcode illegal at all value types
6660  // (See also visitSELECT)
6662  // fold (uint_to_fp (setcc x, y, cc)) -> (select_cc x, y, -1.0, 0.0,, cc)
6663 
6664  if (N0.getOpcode() == ISD::SETCC && !VT.isVector() &&
6665  (!LegalOperations ||
6667  SDValue Ops[] =
6668  { N0.getOperand(0), N0.getOperand(1),
6669  DAG.getConstantFP(1.0, VT), DAG.getConstantFP(0.0, VT),
6670  N0.getOperand(2) };
6671  return DAG.getNode(ISD::SELECT_CC, SDLoc(N), VT, Ops, 5);
6672  }
6673  }
6674 
6675  return SDValue();
6676 }
6677 
6678 SDValue DAGCombiner::visitFP_TO_SINT(SDNode *N) {
6679  SDValue N0 = N->getOperand(0);
6681  EVT VT = N->getValueType(0);
6682 
6683  // fold (fp_to_sint c1fp) -> c1
6684  if (N0CFP)
6685  return DAG.getNode(ISD::FP_TO_SINT, SDLoc(N), VT, N0);
6686 
6687  return SDValue();
6688 }
6689 
6690 SDValue DAGCombiner::visitFP_TO_UINT(SDNode *N) {
6691  SDValue N0 = N->getOperand(0);
6693  EVT VT = N->getValueType(0);
6694 
6695  // fold (fp_to_uint c1fp) -> c1
6696  if (N0CFP)
6697  return DAG.getNode(ISD::FP_TO_UINT, SDLoc(N), VT, N0);
6698 
6699  return SDValue();
6700 }
6701 
6702 SDValue DAGCombiner::visitFP_ROUND(SDNode *N) {
6703  SDValue N0 = N->getOperand(0);
6704  SDValue N1 = N->getOperand(1);
6706  EVT VT = N->getValueType(0);
6707 
6708  // fold (fp_round c1fp) -> c1fp
6709  if (N0CFP)
6710  return DAG.getNode(ISD::FP_ROUND, SDLoc(N), VT, N0, N1);
6711 
6712  // fold (fp_round (fp_extend x)) -> x
6713  if (N0.getOpcode() == ISD::FP_EXTEND && VT == N0.getOperand(0).getValueType())
6714  return N0.getOperand(0);
6715 
6716  // fold (fp_round (fp_round x)) -> (fp_round x)
6717  if (N0.getOpcode() == ISD::FP_ROUND) {
6718  // This is a value preserving truncation if both round's are.
6719  bool IsTrunc = N->getConstantOperandVal(1) == 1 &&
6720  N0.getNode()->getConstantOperandVal(1) == 1;
6721  return DAG.getNode(ISD::FP_ROUND, SDLoc(N), VT, N0.getOperand(0),
6722  DAG.getIntPtrConstant(IsTrunc));
6723  }
6724 
6725  // fold (fp_round (copysign X, Y)) -> (copysign (fp_round X), Y)
6726  if (N0.getOpcode() == ISD::FCOPYSIGN && N0.getNode()->hasOneUse()) {
6727  SDValue Tmp = DAG.getNode(ISD::FP_ROUND, SDLoc(N0), VT,
6728  N0.getOperand(0), N1);
6729  AddToWorkList(Tmp.getNode());
6730  return DAG.getNode(ISD::FCOPYSIGN, SDLoc(N), VT,
6731  Tmp, N0.getOperand(1));
6732  }
6733 
6734  return SDValue();
6735 }
6736 
6737 SDValue DAGCombiner::visitFP_ROUND_INREG(SDNode *N) {
6738  SDValue N0 = N->getOperand(0);
6739  EVT VT = N->getValueType(0);
6740  EVT EVT = cast<VTSDNode>(N->getOperand(1))->getVT();
6742 
6743  // fold (fp_round_inreg c1fp) -> c1fp
6744  if (N0CFP && isTypeLegal(EVT)) {
6745  SDValue Round = DAG.getConstantFP(*N0CFP->getConstantFPValue(), EVT);
6746  return DAG.getNode(ISD::FP_EXTEND, SDLoc(N), VT, Round);
6747  }
6748 
6749  return SDValue();
6750 }
6751 
6752 SDValue DAGCombiner::visitFP_EXTEND(SDNode *N) {
6753  SDValue N0 = N->getOperand(0);
6755  EVT VT = N->getValueType(0);
6756 
6757  // If this is fp_round(fpextend), don't fold it, allow ourselves to be folded.
6758  if (N->hasOneUse() &&
6759  N->use_begin()->getOpcode() == ISD::FP_ROUND)
6760  return SDValue();
6761 
6762  // fold (fp_extend c1fp) -> c1fp
6763  if (N0CFP)
6764  return DAG.getNode(ISD::FP_EXTEND, SDLoc(N), VT, N0);
6765 
6766  // Turn fp_extend(fp_round(X, 1)) -> x since the fp_round doesn't affect the
6767  // value of X.
6768  if (N0.getOpcode() == ISD::FP_ROUND
6769  && N0.getNode()->getConstantOperandVal(1) == 1) {
6770  SDValue In = N0.getOperand(0);
6771  if (In.getValueType() == VT) return In;
6772  if (VT.bitsLT(In.getValueType()))
6773  return DAG.getNode(ISD::FP_ROUND, SDLoc(N), VT,
6774  In, N0.getOperand(1));
6775  return DAG.getNode(ISD::FP_EXTEND, SDLoc(N), VT, In);
6776  }
6777 
6778  // fold (fpext (load x)) -> (fpext (fptrunc (extload x)))
6779  if (ISD::isNormalLoad(N0.getNode()) && N0.hasOneUse() &&
6780  ((!LegalOperations && !cast<LoadSDNode>(N0)->isVolatile()) ||
6782  LoadSDNode *LN0 = cast<LoadSDNode>(N0);
6783  SDValue ExtLoad = DAG.getExtLoad(ISD::EXTLOAD, SDLoc(N), VT,
6784  LN0->getChain(),
6785  LN0->getBasePtr(), N0.getValueType(),
6786  LN0->getMemOperand());
6787  CombineTo(N, ExtLoad);
6788  CombineTo(N0.getNode(),
6789  DAG.getNode(ISD::FP_ROUND, SDLoc(N0),
6790  N0.getValueType(), ExtLoad, DAG.getIntPtrConstant(1)),
6791  ExtLoad.getValue(1));
6792  return SDValue(N, 0); // Return N so it doesn't get rechecked!
6793  }
6794 
6795  return SDValue();
6796 }
6797 
6798 SDValue DAGCombiner::visitFNEG(SDNode *N) {
6799  SDValue N0 = N->getOperand(0);
6800  EVT VT = N->getValueType(0);
6801 
6802  if (VT.isVector()) {
6803  SDValue FoldedVOp = SimplifyVUnaryOp(N);
6804  if (FoldedVOp.getNode()) return FoldedVOp;
6805  }
6806 
6807  if (isNegatibleForFree(N0, LegalOperations, DAG.getTargetLoweringInfo(),
6808  &DAG.getTarget().Options))
6809  return GetNegatedExpression(N0, DAG, LegalOperations);
6810 
6811  // Transform fneg(bitconvert(x)) -> bitconvert(x^sign) to avoid loading
6812  // constant pool values.
6813  if (!TLI.isFNegFree(VT) && N0.getOpcode() == ISD::BITCAST &&
6814  !VT.isVector() &&
6815  N0.getNode()->hasOneUse() &&
6816  N0.getOperand(0).getValueType().isInteger()) {
6817  SDValue Int = N0.getOperand(0);
6818  EVT IntVT = Int.getValueType();
6819  if (IntVT.isInteger() && !IntVT.isVector()) {
6820  Int = DAG.getNode(ISD::XOR, SDLoc(N0), IntVT, Int,
6821  DAG.getConstant(APInt::getSignBit(IntVT.getSizeInBits()), IntVT));
6822  AddToWorkList(Int.getNode());
6823  return DAG.getNode(ISD::BITCAST, SDLoc(N),
6824  VT, Int);
6825  }
6826  }
6827 
6828  // (fneg (fmul c, x)) -> (fmul -c, x)
6829  if (N0.getOpcode() == ISD::FMUL) {
6831  if (CFP1)
6832  return DAG.getNode(ISD::FMUL, SDLoc(N), VT,
6833  N0.getOperand(0),
6834  DAG.getNode(ISD::FNEG, SDLoc(N), VT,
6835  N0.getOperand(1)));
6836  }
6837 
6838  return SDValue();
6839 }
6840 
6841 SDValue DAGCombiner::visitFCEIL(SDNode *N) {
6842  SDValue N0 = N->getOperand(0);
6844  EVT VT = N->getValueType(0);
6845 
6846  // fold (fceil c1) -> fceil(c1)
6847  if (N0CFP)
6848  return DAG.getNode(ISD::FCEIL, SDLoc(N), VT, N0);
6849 
6850  return SDValue();
6851 }
6852 
6853 SDValue DAGCombiner::visitFTRUNC(SDNode *N) {
6854  SDValue N0 = N->getOperand(0);
6856  EVT VT = N->getValueType(0);
6857 
6858  // fold (ftrunc c1) -> ftrunc(c1)
6859  if (N0CFP)
6860  return DAG.getNode(ISD::FTRUNC, SDLoc(N), VT, N0);
6861 
6862  return SDValue();
6863 }
6864 
6865 SDValue DAGCombiner::visitFFLOOR(SDNode *N) {
6866  SDValue N0 = N->getOperand(0);
6868  EVT VT = N->getValueType(0);
6869 
6870  // fold (ffloor c1) -> ffloor(c1)
6871  if (N0CFP)
6872  return DAG.getNode(ISD::FFLOOR, SDLoc(N), VT, N0);
6873 
6874  return SDValue();
6875 }
6876 
6877 SDValue DAGCombiner::visitFABS(SDNode *N) {
6878  SDValue N0 = N->getOperand(0);
6880  EVT VT = N->getValueType(0);
6881 
6882  if (VT.isVector()) {
6883  SDValue FoldedVOp = SimplifyVUnaryOp(N);
6884  if (FoldedVOp.getNode()) return FoldedVOp;
6885  }
6886 
6887  // fold (fabs c1) -> fabs(c1)
6888  if (N0CFP)
6889  return DAG.getNode(ISD::FABS, SDLoc(N), VT, N0);
6890  // fold (fabs (fabs x)) -> (fabs x)
6891  if (N0.getOpcode() == ISD::FABS)
6892  return N->getOperand(0);
6893  // fold (fabs (fneg x)) -> (fabs x)
6894  // fold (fabs (fcopysign x, y)) -> (fabs x)
6895  if (N0.getOpcode() == ISD::FNEG || N0.getOpcode() == ISD::FCOPYSIGN)
6896  return DAG.getNode(ISD::FABS, SDLoc(N), VT, N0.getOperand(0));
6897 
6898  // Transform fabs(bitconvert(x)) -> bitconvert(x&~sign) to avoid loading
6899  // constant pool values.
6900  if (!TLI.isFAbsFree(VT) &&
6901  N0.getOpcode() == ISD::BITCAST && N0.getNode()->hasOneUse() &&
6902  N0.getOperand(0).getValueType().isInteger() &&
6903  !N0.getOperand(0).getValueType().isVector()) {
6904  SDValue Int = N0.getOperand(0);
6905  EVT IntVT = Int.getValueType();
6906  if (IntVT.isInteger() && !IntVT.isVector()) {
6907  Int = DAG.getNode(ISD::AND, SDLoc(N0), IntVT, Int,
6908  DAG.getConstant(~APInt::getSignBit(IntVT.getSizeInBits()), IntVT));
6909  AddToWorkList(Int.getNode());
6910  return DAG.getNode(ISD::BITCAST, SDLoc(N),
6911  N->getValueType(0), Int);
6912  }
6913  }
6914 
6915  return SDValue();
6916 }
6917 
6918 SDValue DAGCombiner::visitBRCOND(SDNode *N) {
6919  SDValue Chain = N->getOperand(0);
6920  SDValue N1 = N->getOperand(1);
6921  SDValue N2 = N->getOperand(2);
6922 
6923  // If N is a constant we could fold this into a fallthrough or unconditional
6924  // branch. However that doesn't happen very often in normal code, because
6925  // Instcombine/SimplifyCFG should have handled the available opportunities.
6926  // If we did this folding here, it would be necessary to update the
6927  // MachineBasicBlock CFG, which is awkward.
6928 
6929  // fold a brcond with a setcc condition into a BR_CC node if BR_CC is legal
6930  // on the target.
6931  if (N1.getOpcode() == ISD::SETCC &&
6933  N1.getOperand(0).getValueType())) {
6934  return DAG.getNode(ISD::BR_CC, SDLoc(N), MVT::Other,
6935  Chain, N1.getOperand(2),
6936  N1.getOperand(0), N1.getOperand(1), N2);
6937  }
6938 
6939  if ((N1.hasOneUse() && N1.getOpcode() == ISD::SRL) ||
6940  ((N1.getOpcode() == ISD::TRUNCATE && N1.hasOneUse()) &&
6941  (N1.getOperand(0).hasOneUse() &&
6942  N1.getOperand(0).getOpcode() == ISD::SRL))) {
6943  SDNode *Trunc = 0;
6944  if (N1.getOpcode() == ISD::TRUNCATE) {
6945  // Look pass the truncate.
6946  Trunc = N1.getNode();
6947  N1 = N1.getOperand(0);
6948  }
6949 
6950  // Match this pattern so that we can generate simpler code:
6951  //
6952  // %a = ...
6953  // %b = and i32 %a, 2
6954  // %c = srl i32 %b, 1
6955  // brcond i32 %c ...
6956  //
6957  // into
6958  //
6959  // %a = ...
6960  // %b = and i32 %a, 2
6961  // %c = setcc eq %b, 0
6962  // brcond %c ...
6963  //
6964  // This applies only when the AND constant value has one bit set and the
6965  // SRL constant is equal to the log2 of the AND constant. The back-end is
6966  // smart enough to convert the result into a TEST/JMP sequence.
6967  SDValue Op0 = N1.getOperand(0);
6968  SDValue Op1 = N1.getOperand(1);
6969 
6970  if (Op0.getOpcode() == ISD::AND &&
6971  Op1.getOpcode() == ISD::Constant) {
6972  SDValue AndOp1 = Op0.getOperand(1);
6973 
6974  if (AndOp1.getOpcode() == ISD::Constant) {
6975  const APInt &AndConst = cast<ConstantSDNode>(AndOp1)->getAPIntValue();
6976 
6977  if (AndConst.isPowerOf2() &&
6978  cast<ConstantSDNode>(Op1)->getAPIntValue()==AndConst.logBase2()) {
6979  SDValue SetCC =
6980  DAG.getSetCC(SDLoc(N),
6982  Op0, DAG.getConstant(0, Op0.getValueType()),
6983  ISD::SETNE);
6984 
6985  SDValue NewBRCond = DAG.getNode(ISD::BRCOND, SDLoc(N),
6986  MVT::Other, Chain, SetCC, N2);
6987  // Don't add the new BRCond into the worklist or else SimplifySelectCC
6988  // will convert it back to (X & C1) >> C2.
6989  CombineTo(N, NewBRCond, false);
6990  // Truncate is dead.
6991  if (Trunc) {
6992  removeFromWorkList(Trunc);
6993  DAG.DeleteNode(Trunc);
6994  }
6995  // Replace the uses of SRL with SETCC
6996  WorkListRemover DeadNodes(*this);
6997  DAG.ReplaceAllUsesOfValueWith(N1, SetCC);
6998  removeFromWorkList(N1.getNode());
6999  DAG.DeleteNode(N1.getNode());
7000  return SDValue(N, 0); // Return N so it doesn't get rechecked!
7001  }
7002  }
7003  }
7004 
7005  if (Trunc)
7006  // Restore N1 if the above transformation doesn't match.
7007  N1 = N->getOperand(1);
7008  }
7009 
7010  // Transform br(xor(x, y)) -> br(x != y)
7011  // Transform br(xor(xor(x,y), 1)) -> br (x == y)
7012  if (N1.hasOneUse() && N1.getOpcode() == ISD::XOR) {
7013  SDNode *TheXor = N1.getNode();
7014  SDValue Op0 = TheXor->getOperand(0);
7015  SDValue Op1 = TheXor->getOperand(1);
7016  if (Op0.getOpcode() == Op1.getOpcode()) {
7017  // Avoid missing important xor optimizations.
7018  SDValue Tmp = visitXOR(TheXor);
7019  if (Tmp.getNode()) {
7020  if (Tmp.getNode() != TheXor) {
7021  DEBUG(dbgs() << "\nReplacing.8 ";
7022  TheXor->dump(&DAG);
7023  dbgs() << "\nWith: ";
7024  Tmp.getNode()->dump(&DAG);
7025  dbgs() << '\n');
7026  WorkListRemover DeadNodes(*this);
7027  DAG.ReplaceAllUsesOfValueWith(N1, Tmp);
7028  removeFromWorkList(TheXor);
7029  DAG.DeleteNode(TheXor);
7030  return DAG.getNode(ISD::BRCOND, SDLoc(N),
7031  MVT::Other, Chain, Tmp, N2);
7032  }
7033 
7034  // visitXOR has changed XOR's operands or replaced the XOR completely,
7035  // bail out.
7036  return SDValue(N, 0);
7037  }
7038  }
7039 
7040  if (Op0.getOpcode() != ISD::SETCC && Op1.getOpcode() != ISD::SETCC) {
7041  bool Equal = false;
7042  if (ConstantSDNode *RHSCI = dyn_cast<ConstantSDNode>(Op0))
7043  if (RHSCI->getAPIntValue() == 1 && Op0.hasOneUse() &&
7044  Op0.getOpcode() == ISD::XOR) {
7045  TheXor = Op0.getNode();
7046  Equal = true;
7047  }
7048 
7049  EVT SetCCVT = N1.getValueType();
7050  if (LegalTypes)
7051  SetCCVT = getSetCCResultType(SetCCVT);
7052  SDValue SetCC = DAG.getSetCC(SDLoc(TheXor),
7053  SetCCVT,
7054  Op0, Op1,
7055  Equal ? ISD::SETEQ : ISD::SETNE);
7056  // Replace the uses of XOR with SETCC
7057  WorkListRemover DeadNodes(*this);
7058  DAG.ReplaceAllUsesOfValueWith(N1, SetCC);
7059  removeFromWorkList(N1.getNode());
7060  DAG.DeleteNode(N1.getNode());
7061  return DAG.getNode(ISD::BRCOND, SDLoc(N),
7062  MVT::Other, Chain, SetCC, N2);
7063  }
7064  }
7065 
7066  return SDValue();
7067 }
7068 
7069 // Operand List for BR_CC: Chain, CondCC, CondLHS, CondRHS, DestBB.
7070 //
7071 SDValue DAGCombiner::visitBR_CC(SDNode *N) {
7072  CondCodeSDNode *CC = cast<CondCodeSDNode>(N->getOperand(1));
7073  SDValue CondLHS = N->getOperand(2), CondRHS = N->getOperand(3);
7074 
7075  // If N is a constant we could fold this into a fallthrough or unconditional
7076  // branch. However that doesn't happen very often in normal code, because
7077  // Instcombine/SimplifyCFG should have handled the available opportunities.
7078  // If we did this folding here, it would be necessary to update the
7079  // MachineBasicBlock CFG, which is awkward.
7080 
7081  // Use SimplifySetCC to simplify SETCC's.
7082  SDValue Simp = SimplifySetCC(getSetCCResultType(CondLHS.getValueType()),
7083  CondLHS, CondRHS, CC->get(), SDLoc(N),
7084  false);
7085  if (Simp.getNode()) AddToWorkList(Simp.getNode());
7086 
7087  // fold to a simpler setcc
7088  if (Simp.getNode() && Simp.getOpcode() == ISD::SETCC)
7089  return DAG.getNode(ISD::BR_CC, SDLoc(N), MVT::Other,
7090  N->getOperand(0), Simp.getOperand(2),
7091  Simp.getOperand(0), Simp.getOperand(1),
7092  N->getOperand(4));
7093 
7094  return SDValue();
7095 }
7096 
7097 /// canFoldInAddressingMode - Return true if 'Use' is a load or a store that
7098 /// uses N as its base pointer and that N may be folded in the load / store
7099 /// addressing mode.
7101  SelectionDAG &DAG,
7102  const TargetLowering &TLI) {
7103  EVT VT;
7104  if (LoadSDNode *LD = dyn_cast<LoadSDNode>(Use)) {
7105  if (LD->isIndexed() || LD->getBasePtr().getNode() != N)
7106  return false;
7107  VT = Use->getValueType(0);
7108  } else if (StoreSDNode *ST = dyn_cast<StoreSDNode>(Use)) {
7109  if (ST->isIndexed() || ST->getBasePtr().getNode() != N)
7110  return false;
7111  VT = ST->getValue().getValueType();
7112  } else
7113  return false;
7114 
7116  if (N->getOpcode() == ISD::ADD) {
7118  if (Offset)
7119  // [reg +/- imm]
7120  AM.BaseOffs = Offset->getSExtValue();
7121  else
7122  // [reg +/- reg]
7123  AM.Scale = 1;
7124  } else if (N->getOpcode() == ISD::SUB) {
7126  if (Offset)
7127  // [reg +/- imm]
7128  AM.BaseOffs = -Offset->getSExtValue();
7129  else
7130  // [reg +/- reg]
7131  AM.Scale = 1;
7132  } else
7133  return false;
7134 
7135  return TLI.isLegalAddressingMode(AM, VT.getTypeForEVT(*DAG.getContext()));
7136 }
7137 
7138 /// CombineToPreIndexedLoadStore - Try turning a load / store into a
7139 /// pre-indexed load / store when the base pointer is an add or subtract
7140 /// and it has other uses besides the load / store. After the
7141 /// transformation, the new indexed load / store has effectively folded
7142 /// the add / subtract in and all of its other uses are redirected to the
7143 /// new load / store.
7144 bool DAGCombiner::CombineToPreIndexedLoadStore(SDNode *N) {
7145  if (Level < AfterLegalizeDAG)
7146  return false;
7147 
7148  bool isLoad = true;
7149  SDValue Ptr;
7150  EVT VT;
7151  if (LoadSDNode *LD = dyn_cast<LoadSDNode>(N)) {
7152  if (LD->isIndexed())
7153  return false;
7154  VT = LD->getMemoryVT();
7155  if (!TLI.isIndexedLoadLegal(ISD::PRE_INC, VT) &&
7156  !TLI.isIndexedLoadLegal(ISD::PRE_DEC, VT))
7157  return false;
7158  Ptr = LD->getBasePtr();
7159  } else if (StoreSDNode *ST = dyn_cast<StoreSDNode>(N)) {
7160  if (ST->isIndexed())
7161  return false;
7162  VT = ST->getMemoryVT();
7163  if (!TLI.isIndexedStoreLegal(ISD::PRE_INC, VT) &&
7165  return false;
7166  Ptr = ST->getBasePtr();
7167  isLoad = false;
7168  } else {
7169  return false;
7170  }
7171 
7172  // If the pointer is not an add/sub, or if it doesn't have multiple uses, bail
7173  // out. There is no reason to make this a preinc/predec.
7174  if ((Ptr.getOpcode() != ISD::ADD && Ptr.getOpcode() != ISD::SUB) ||
7175  Ptr.getNode()->hasOneUse())
7176  return false;
7177 
7178  // Ask the target to do addressing mode selection.
7179  SDValue BasePtr;
7180  SDValue Offset;
7182  if (!TLI.getPreIndexedAddressParts(N, BasePtr, Offset, AM, DAG))
7183  return false;
7184 
7185  // Backends without true r+i pre-indexed forms may need to pass a
7186  // constant base with a variable offset so that constant coercion
7187  // will work with the patterns in canonical form.
7188  bool Swapped = false;
7189  if (isa<ConstantSDNode>(BasePtr)) {
7190  std::swap(BasePtr, Offset);
7191  Swapped = true;
7192  }
7193 
7194  // Don't create a indexed load / store with zero offset.
7195  if (isa<ConstantSDNode>(Offset) &&
7196  cast<ConstantSDNode>(Offset)->isNullValue())
7197  return false;
7198 
7199  // Try turning it into a pre-indexed load / store except when:
7200  // 1) The new base ptr is a frame index.
7201  // 2) If N is a store and the new base ptr is either the same as or is a
7202  // predecessor of the value being stored.
7203  // 3) Another use of old base ptr is a predecessor of N. If ptr is folded
7204  // that would create a cycle.
7205  // 4) All uses are load / store ops that use it as old base ptr.
7206 
7207  // Check #1. Preinc'ing a frame index would require copying the stack pointer
7208  // (plus the implicit offset) to a register to preinc anyway.
7209  if (isa<FrameIndexSDNode>(BasePtr) || isa<RegisterSDNode>(BasePtr))
7210  return false;
7211 
7212  // Check #2.
7213  if (!isLoad) {
7214  SDValue Val = cast<StoreSDNode>(N)->getValue();
7215  if (Val == BasePtr || BasePtr.getNode()->isPredecessorOf(Val.getNode()))
7216  return false;
7217  }
7218 
7219  // If the offset is a constant, there may be other adds of constants that
7220  // can be folded with this one. We should do this to avoid having to keep
7221  // a copy of the original base pointer.
7222  SmallVector<SDNode *, 16> OtherUses;
7223  if (isa<ConstantSDNode>(Offset))
7224  for (SDNode::use_iterator I = BasePtr.getNode()->use_begin(),
7225  E = BasePtr.getNode()->use_end(); I != E; ++I) {
7226  SDNode *Use = *I;
7227  if (Use == Ptr.getNode())
7228  continue;
7229 
7230  if (Use->isPredecessorOf(N))
7231  continue;
7232 
7233  if (Use->getOpcode() != ISD::ADD && Use->getOpcode() != ISD::SUB) {
7234  OtherUses.clear();
7235  break;
7236  }
7237 
7238  SDValue Op0 = Use->getOperand(0), Op1 = Use->getOperand(1);
7239  if (Op1.getNode() == BasePtr.getNode())
7240  std::swap(Op0, Op1);
7241  assert(Op0.getNode() == BasePtr.getNode() &&
7242  "Use of ADD/SUB but not an operand");
7243 
7244  if (!isa<ConstantSDNode>(Op1)) {
7245  OtherUses.clear();
7246  break;
7247  }
7248 
7249  // FIXME: In some cases, we can be smarter about this.
7250  if (Op1.getValueType() != Offset.getValueType()) {
7251  OtherUses.clear();
7252  break;
7253  }
7254 
7255  OtherUses.push_back(Use);
7256  }
7257 
7258  if (Swapped)
7259  std::swap(BasePtr, Offset);
7260 
7261  // Now check for #3 and #4.
7262  bool RealUse = false;
7263 
7264  // Caches for hasPredecessorHelper
7267 
7268  for (SDNode::use_iterator I = Ptr.getNode()->use_begin(),
7269  E = Ptr.getNode()->use_end(); I != E; ++I) {
7270  SDNode *Use = *I;
7271  if (Use == N)
7272  continue;
7273  if (N->hasPredecessorHelper(Use, Visited, Worklist))
7274  return false;
7275 
7276  // If Ptr may be folded in addressing mode of other use, then it's
7277  // not profitable to do this transformation.
7278  if (!canFoldInAddressingMode(Ptr.getNode(), Use, DAG, TLI))
7279  RealUse = true;
7280  }
7281 
7282  if (!RealUse)
7283  return false;
7284 
7285  SDValue Result;
7286  if (isLoad)
7287  Result = DAG.getIndexedLoad(SDValue(N,0), SDLoc(N),
7288  BasePtr, Offset, AM);
7289  else
7290  Result = DAG.getIndexedStore(SDValue(N,0), SDLoc(N),
7291  BasePtr, Offset, AM);
7292  ++PreIndexedNodes;
7293  ++NodesCombined;
7294  DEBUG(dbgs() << "\nReplacing.4 ";
7295  N->dump(&DAG);
7296  dbgs() << "\nWith: ";
7297  Result.getNode()->dump(&DAG);
7298  dbgs() << '\n');
7299  WorkListRemover DeadNodes(*this);
7300  if (isLoad) {
7301  DAG.ReplaceAllUsesOfValueWith(SDValue(N, 0), Result.getValue(0));
7302  DAG.ReplaceAllUsesOfValueWith(SDValue(N, 1), Result.getValue(2));
7303  } else {
7304  DAG.ReplaceAllUsesOfValueWith(SDValue(N, 0), Result.getValue(1));
7305  }
7306 
7307  // Finally, since the node is now dead, remove it from the graph.
7308  DAG.DeleteNode(N);
7309 
7310  if (Swapped)
7311  std::swap(BasePtr, Offset);
7312 
7313  // Replace other uses of BasePtr that can be updated to use Ptr
7314  for (unsigned i = 0, e = OtherUses.size(); i != e; ++i) {
7315  unsigned OffsetIdx = 1;
7316  if (OtherUses[i]->getOperand(OffsetIdx).getNode() == BasePtr.getNode())
7317  OffsetIdx = 0;
7318  assert(OtherUses[i]->getOperand(!OffsetIdx).getNode() ==
7319  BasePtr.getNode() && "Expected BasePtr operand");
7320 
7321  // We need to replace ptr0 in the following expression:
7322  // x0 * offset0 + y0 * ptr0 = t0
7323  // knowing that
7324  // x1 * offset1 + y1 * ptr0 = t1 (the indexed load/store)
7325  //
7326  // where x0, x1, y0 and y1 in {-1, 1} are given by the types of the
7327  // indexed load/store and the expresion that needs to be re-written.
7328  //
7329  // Therefore, we have:
7330  // t0 = (x0 * offset0 - x1 * y0 * y1 *offset1) + (y0 * y1) * t1
7331 
7332  ConstantSDNode *CN =
7333  cast<ConstantSDNode>(OtherUses[i]->getOperand(OffsetIdx));
7334  int X0, X1, Y0, Y1;
7335  APInt Offset0 = CN->getAPIntValue();
7336  APInt Offset1 = cast<ConstantSDNode>(Offset)->getAPIntValue();
7337 
7338  X0 = (OtherUses[i]->getOpcode() == ISD::SUB && OffsetIdx == 1) ? -1 : 1;
7339  Y0 = (OtherUses[i]->getOpcode() == ISD::SUB && OffsetIdx == 0) ? -1 : 1;
7340  X1 = (AM == ISD::PRE_DEC && !Swapped) ? -1 : 1;
7341  Y1 = (AM == ISD::PRE_DEC && Swapped) ? -1 : 1;
7342 
7343  unsigned Opcode = (Y0 * Y1 < 0) ? ISD::SUB : ISD::ADD;
7344 
7345  APInt CNV = Offset0;
7346  if (X0 < 0) CNV = -CNV;
7347  if (X1 * Y0 * Y1 < 0) CNV = CNV + Offset1;
7348  else CNV = CNV - Offset1;
7349 
7350  // We can now generate the new expression.
7351  SDValue NewOp1 = DAG.getConstant(CNV, CN->getValueType(0));
7352  SDValue NewOp2 = Result.getValue(isLoad ? 1 : 0);
7353 
7354  SDValue NewUse = DAG.getNode(Opcode,
7355  SDLoc(OtherUses[i]),
7356  OtherUses[i]->getValueType(0), NewOp1, NewOp2);
7357  DAG.ReplaceAllUsesOfValueWith(SDValue(OtherUses[i], 0), NewUse);
7358  removeFromWorkList(OtherUses[i]);
7359  DAG.DeleteNode(OtherUses[i]);
7360  }
7361 
7362  // Replace the uses of Ptr with uses of the updated base value.
7363  DAG.ReplaceAllUsesOfValueWith(Ptr, Result.getValue(isLoad ? 1 : 0));
7364  removeFromWorkList(Ptr.getNode());
7365  DAG.DeleteNode(Ptr.getNode());
7366 
7367  return true;
7368 }
7369 
7370 /// CombineToPostIndexedLoadStore - Try to combine a load / store with a
7371 /// add / sub of the base pointer node into a post-indexed load / store.
7372 /// The transformation folded the add / subtract into the new indexed
7373 /// load / store effectively and all of its uses are redirected to the
7374 /// new load / store.
7375 bool DAGCombiner::CombineToPostIndexedLoadStore(SDNode *N) {
7376  if (Level < AfterLegalizeDAG)
7377  return false;
7378 
7379  bool isLoad = true;
7380  SDValue Ptr;
7381  EVT VT;
7382  if (LoadSDNode *LD = dyn_cast<LoadSDNode>(N)) {
7383  if (LD->isIndexed())
7384  return false;
7385  VT = LD->getMemoryVT();
7386  if (!TLI.isIndexedLoadLegal(ISD::POST_INC, VT) &&
7388  return false;
7389  Ptr = LD->getBasePtr();
7390  } else if (StoreSDNode *ST = dyn_cast<StoreSDNode>(N)) {
7391  if (ST->isIndexed())
7392  return false;
7393  VT = ST->getMemoryVT();
7394  if (!TLI.isIndexedStoreLegal(ISD::POST_INC, VT) &&
7396  return false;
7397  Ptr = ST->getBasePtr();
7398  isLoad = false;
7399  } else {
7400  return false;
7401  }
7402 
7403  if (Ptr.getNode()->hasOneUse())
7404  return false;
7405 
7406  for (SDNode::use_iterator I = Ptr.getNode()->use_begin(),
7407  E = Ptr.getNode()->use_end(); I != E; ++I) {
7408  SDNode *Op = *I;
7409  if (Op == N ||
7410  (Op->getOpcode() != ISD::ADD && Op->getOpcode() != ISD::SUB))
7411  continue;
7412 
7413  SDValue BasePtr;
7414  SDValue Offset;
7416  if (TLI.getPostIndexedAddressParts(N, Op, BasePtr, Offset, AM, DAG)) {
7417  // Don't create a indexed load / store with zero offset.
7418  if (isa<ConstantSDNode>(Offset) &&
7419  cast<ConstantSDNode>(Offset)->isNullValue())
7420  continue;
7421 
7422  // Try turning it into a post-indexed load / store except when
7423  // 1) All uses are load / store ops that use it as base ptr (and
7424  // it may be folded as addressing mmode).
7425  // 2) Op must be independent of N, i.e. Op is neither a predecessor
7426  // nor a successor of N. Otherwise, if Op is folded that would
7427  // create a cycle.
7428 
7429  if (isa<FrameIndexSDNode>(BasePtr) || isa<RegisterSDNode>(BasePtr))
7430  continue;
7431 
7432  // Check for #1.
7433  bool TryNext = false;
7434  for (SDNode::use_iterator II = BasePtr.getNode()->use_begin(),
7435  EE = BasePtr.getNode()->use_end(); II != EE; ++II) {
7436  SDNode *Use = *II;
7437  if (Use == Ptr.getNode())
7438  continue;
7439 
7440  // If all the uses are load / store addresses, then don't do the
7441  // transformation.
7442  if (Use->getOpcode() == ISD::ADD || Use->getOpcode() == ISD::SUB){
7443  bool RealUse = false;
7444  for (SDNode::use_iterator III = Use->use_begin(),
7445  EEE = Use->use_end(); III != EEE; ++III) {
7446  SDNode *UseUse = *III;
7447  if (!canFoldInAddressingMode(Use, UseUse, DAG, TLI))
7448  RealUse = true;
7449  }
7450 
7451  if (!RealUse) {
7452  TryNext = true;
7453  break;
7454  }
7455  }
7456  }
7457 
7458  if (TryNext)
7459  continue;
7460 
7461  // Check for #2
7462  if (!Op->isPredecessorOf(N) && !N->isPredecessorOf(Op)) {
7463  SDValue Result = isLoad
7464  ? DAG.getIndexedLoad(SDValue(N,0), SDLoc(N),
7465  BasePtr, Offset, AM)
7466  : DAG.getIndexedStore(SDValue(N,0), SDLoc(N),
7467  BasePtr, Offset, AM);
7468  ++PostIndexedNodes;
7469  ++NodesCombined;
7470  DEBUG(dbgs() << "\nReplacing.5 ";
7471  N->dump(&DAG);
7472  dbgs() << "\nWith: ";
7473  Result.getNode()->dump(&DAG);
7474  dbgs() << '\n');
7475  WorkListRemover DeadNodes(*this);
7476  if (isLoad) {
7477  DAG.ReplaceAllUsesOfValueWith(SDValue(N, 0), Result.getValue(0));
7478  DAG.ReplaceAllUsesOfValueWith(SDValue(N, 1), Result.getValue(2));
7479  } else {
7480  DAG.ReplaceAllUsesOfValueWith(SDValue(N, 0), Result.getValue(1));
7481  }
7482 
7483  // Finally, since the node is now dead, remove it from the graph.
7484  DAG.DeleteNode(N);
7485 
7486  // Replace the uses of Use with uses of the updated base value.
7487  DAG.ReplaceAllUsesOfValueWith(SDValue(Op, 0),
7488  Result.getValue(isLoad ? 1 : 0));
7489  removeFromWorkList(Op);
7490  DAG.DeleteNode(Op);
7491  return true;
7492  }
7493  }
7494  }
7495 
7496  return false;
7497 }
7498 
7499 SDValue DAGCombiner::visitLOAD(SDNode *N) {
7500  LoadSDNode *LD = cast<LoadSDNode>(N);
7501  SDValue Chain = LD->getChain();
7502  SDValue Ptr = LD->getBasePtr();
7503 
7504  // If load is not volatile and there are no uses of the loaded value (and
7505  // the updated indexed value in case of indexed loads), change uses of the
7506  // chain value into uses of the chain input (i.e. delete the dead load).
7507  if (!LD->isVolatile()) {
7508  if (N->getValueType(1) == MVT::Other) {
7509  // Unindexed loads.
7510  if (!N->hasAnyUseOfValue(0)) {
7511  // It's not safe to use the two value CombineTo variant here. e.g.
7512  // v1, chain2 = load chain1, loc
7513  // v2, chain3 = load chain2, loc
7514  // v3 = add v2, c
7515  // Now we replace use of chain2 with chain1. This makes the second load
7516  // isomorphic to the one we are deleting, and thus makes this load live.
7517  DEBUG(dbgs() << "\nReplacing.6 ";
7518  N->dump(&DAG);
7519  dbgs() << "\nWith chain: ";
7520  Chain.getNode()->dump(&DAG);
7521  dbgs() << "\n");
7522  WorkListRemover DeadNodes(*this);
7523  DAG.ReplaceAllUsesOfValueWith(SDValue(N, 1), Chain);
7524 
7525  if (N->use_empty()) {
7526  removeFromWorkList(N);
7527  DAG.DeleteNode(N);
7528  }
7529 
7530  return SDValue(N, 0); // Return N so it doesn't get rechecked!
7531  }
7532  } else {
7533  // Indexed loads.
7534  assert(N->getValueType(2) == MVT::Other && "Malformed indexed loads?");
7535  if (!N->hasAnyUseOfValue(0) && !N->hasAnyUseOfValue(1)) {
7536  SDValue Undef = DAG.getUNDEF(N->getValueType(0));
7537  DEBUG(dbgs() << "\nReplacing.7 ";
7538  N->dump(&DAG);
7539  dbgs() << "\nWith: ";
7540  Undef.getNode()->dump(&DAG);
7541  dbgs() << " and 2 other values\n");
7542  WorkListRemover DeadNodes(*this);
7543  DAG.ReplaceAllUsesOfValueWith(SDValue(N, 0), Undef);
7544  DAG.ReplaceAllUsesOfValueWith(SDValue(N, 1),
7545  DAG.getUNDEF(N->getValueType(1)));
7546  DAG.ReplaceAllUsesOfValueWith(SDValue(N, 2), Chain);
7547  removeFromWorkList(N);
7548  DAG.DeleteNode(N);
7549  return SDValue(N, 0); // Return N so it doesn't get rechecked!
7550  }
7551  }
7552  }
7553 
7554  // If this load is directly stored, replace the load value with the stored
7555  // value.
7556  // TODO: Handle store large -> read small portion.
7557  // TODO: Handle TRUNCSTORE/LOADEXT
7558  if (ISD::isNormalLoad(N) && !LD->isVolatile()) {
7559  if (ISD::isNON_TRUNCStore(Chain.getNode())) {
7560  StoreSDNode *PrevST = cast<StoreSDNode>(Chain);
7561  if (PrevST->getBasePtr() == Ptr &&
7562  PrevST->getValue().getValueType() == N->getValueType(0))
7563  return CombineTo(N, Chain.getOperand(1), Chain);
7564  }
7565  }
7566 
7567  // Try to infer better alignment information than the load already has.
7568  if (OptLevel != CodeGenOpt::None && LD->isUnindexed()) {
7569  if (unsigned Align = DAG.InferPtrAlignment(Ptr)) {
7570  if (Align > LD->getMemOperand()->getBaseAlignment()) {
7571  SDValue NewLoad =
7572  DAG.getExtLoad(LD->getExtensionType(), SDLoc(N),
7573  LD->getValueType(0),
7574  Chain, Ptr, LD->getPointerInfo(),
7575  LD->getMemoryVT(),
7576  LD->isVolatile(), LD->isNonTemporal(), Align,
7577  LD->getTBAAInfo());
7578  return CombineTo(N, NewLoad, SDValue(NewLoad.getNode(), 1), true);
7579  }
7580  }
7581  }
7582 
7583  bool UseAA = CombinerAA.getNumOccurrences() > 0 ? CombinerAA :
7585  if (UseAA) {
7586  // Walk up chain skipping non-aliasing memory nodes.
7587  SDValue BetterChain = FindBetterChain(N, Chain);
7588 
7589  // If there is a better chain.
7590  if (Chain != BetterChain) {
7591  SDValue ReplLoad;
7592 
7593  // Replace the chain to void dependency.
7594  if (LD->getExtensionType() == ISD::NON_EXTLOAD) {
7595  ReplLoad = DAG.getLoad(N->getValueType(0), SDLoc(LD),
7596  BetterChain, Ptr, LD->getMemOperand());
7597  } else {
7598  ReplLoad = DAG.getExtLoad(LD->getExtensionType(), SDLoc(LD),
7599  LD->getValueType(0),
7600  BetterChain, Ptr, LD->getMemoryVT(),
7601  LD->getMemOperand());
7602  }
7603 
7604  // Create token factor to keep old chain connected.
7605  SDValue Token = DAG.getNode(ISD::TokenFactor, SDLoc(N),
7606  MVT::Other, Chain, ReplLoad.getValue(1));
7607 
7608  // Make sure the new and old chains are cleaned up.
7609  AddToWorkList(Token.getNode());
7610 
7611  // Replace uses with load result and token factor. Don't add users
7612  // to work list.
7613  return CombineTo(N, ReplLoad.getValue(0), Token, false);
7614  }
7615  }
7616 
7617  // Try transforming N to an indexed load.
7618  if (CombineToPreIndexedLoadStore(N) || CombineToPostIndexedLoadStore(N))
7619  return SDValue(N, 0);
7620 
7621  // Try to slice up N to more direct loads if the slices are mapped to
7622  // different register banks or pairing can take place.
7623  if (SliceUpLoad(N))
7624  return SDValue(N, 0);
7625 
7626  return SDValue();
7627 }
7628 
7629 namespace {
7630 /// \brief Helper structure used to slice a load in smaller loads.
7631 /// Basically a slice is obtained from the following sequence:
7632 /// Origin = load Ty1, Base
7633 /// Shift = srl Ty1 Origin, CstTy Amount
7634 /// Inst = trunc Shift to Ty2
7635 ///
7636 /// Then, it will be rewriten into:
7637 /// Slice = load SliceTy, Base + SliceOffset
7638 /// [Inst = zext Slice to Ty2], only if SliceTy <> Ty2
7639 ///
7640 /// SliceTy is deduced from the number of bits that are actually used to
7641 /// build Inst.
7642 struct LoadedSlice {
7643  /// \brief Helper structure used to compute the cost of a slice.
7644  struct Cost {
7645  /// Are we optimizing for code size.
7646  bool ForCodeSize;
7647  /// Various cost.
7648  unsigned Loads;
7649  unsigned Truncates;
7650  unsigned CrossRegisterBanksCopies;
7651  unsigned ZExts;
7652  unsigned Shift;
7653 
7654  Cost(bool ForCodeSize = false)
7655  : ForCodeSize(ForCodeSize), Loads(0), Truncates(0),
7656  CrossRegisterBanksCopies(0), ZExts(0), Shift(0) {}
7657 
7658  /// \brief Get the cost of one isolated slice.
7659  Cost(const LoadedSlice &LS, bool ForCodeSize = false)
7660  : ForCodeSize(ForCodeSize), Loads(1), Truncates(0),
7661  CrossRegisterBanksCopies(0), ZExts(0), Shift(0) {
7662  EVT TruncType = LS.Inst->getValueType(0);
7663  EVT LoadedType = LS.getLoadedType();
7664  if (TruncType != LoadedType &&
7665  !LS.DAG->getTargetLoweringInfo().isZExtFree(LoadedType, TruncType))
7666  ZExts = 1;
7667  }
7668 
7669  /// \brief Account for slicing gain in the current cost.
7670  /// Slicing provide a few gains like removing a shift or a
7671  /// truncate. This method allows to grow the cost of the original
7672  /// load with the gain from this slice.
7673  void addSliceGain(const LoadedSlice &LS) {
7674  // Each slice saves a truncate.
7675  const TargetLowering &TLI = LS.DAG->getTargetLoweringInfo();
7676  if (!TLI.isTruncateFree(LS.Inst->getValueType(0),
7677  LS.Inst->getOperand(0).getValueType()))
7678  ++Truncates;
7679  // If there is a shift amount, this slice gets rid of it.
7680  if (LS.Shift)
7681  ++Shift;
7682  // If this slice can merge a cross register bank copy, account for it.
7683  if (LS.canMergeExpensiveCrossRegisterBankCopy())
7684  ++CrossRegisterBanksCopies;
7685  }
7686 
7687  Cost &operator+=(const Cost &RHS) {
7688  Loads += RHS.Loads;
7689  Truncates += RHS.Truncates;
7690  CrossRegisterBanksCopies += RHS.CrossRegisterBanksCopies;
7691  ZExts += RHS.ZExts;
7692  Shift += RHS.Shift;
7693  return *this;
7694  }
7695 
7696  bool operator==(const Cost &RHS) const {
7697  return Loads == RHS.Loads && Truncates == RHS.Truncates &&
7698  CrossRegisterBanksCopies == RHS.CrossRegisterBanksCopies &&
7699  ZExts == RHS.ZExts && Shift == RHS.Shift;
7700  }
7701 
7702  bool operator!=(const Cost &RHS) const { return !(*this == RHS); }
7703 
7704  bool operator<(const Cost &RHS) const {
7705  // Assume cross register banks copies are as expensive as loads.
7706  // FIXME: Do we want some more target hooks?
7707  unsigned ExpensiveOpsLHS = Loads + CrossRegisterBanksCopies;
7708  unsigned ExpensiveOpsRHS = RHS.Loads + RHS.CrossRegisterBanksCopies;
7709  // Unless we are optimizing for code size, consider the
7710  // expensive operation first.
7711  if (!ForCodeSize && ExpensiveOpsLHS != ExpensiveOpsRHS)
7712  return ExpensiveOpsLHS < ExpensiveOpsRHS;
7713  return (Truncates + ZExts + Shift + ExpensiveOpsLHS) <
7714  (RHS.Truncates + RHS.ZExts + RHS.Shift + ExpensiveOpsRHS);
7715  }
7716 
7717  bool operator>(const Cost &RHS) const { return RHS < *this; }
7718 
7719  bool operator<=(const Cost &RHS) const { return !(RHS < *this); }
7720 
7721  bool operator>=(const Cost &RHS) const { return !(*this < RHS); }
7722  };
7723  // The last instruction that represent the slice. This should be a
7724  // truncate instruction.
7725  SDNode *Inst;
7726  // The original load instruction.
7727  LoadSDNode *Origin;
7728  // The right shift amount in bits from the original load.
7729  unsigned Shift;
7730  // The DAG from which Origin came from.
7731  // This is used to get some contextual information about legal types, etc.
7732  SelectionDAG *DAG;
7733 
7734  LoadedSlice(SDNode *Inst = NULL, LoadSDNode *Origin = NULL,
7735  unsigned Shift = 0, SelectionDAG *DAG = NULL)
7736  : Inst(Inst), Origin(Origin), Shift(Shift), DAG(DAG) {}
7737 
7738  LoadedSlice(const LoadedSlice &LS)
7739  : Inst(LS.Inst), Origin(LS.Origin), Shift(LS.Shift), DAG(LS.DAG) {}
7740 
7741  /// \brief Get the bits used in a chunk of bits \p BitWidth large.
7742  /// \return Result is \p BitWidth and has used bits set to 1 and
7743  /// not used bits set to 0.
7744  APInt getUsedBits() const {
7745  // Reproduce the trunc(lshr) sequence:
7746  // - Start from the truncated value.
7747  // - Zero extend to the desired bit width.
7748  // - Shift left.
7749  assert(Origin && "No original load to compare against.");
7750  unsigned BitWidth = Origin->getValueSizeInBits(0);
7751  assert(Inst && "This slice is not bound to an instruction");
7752  assert(Inst->getValueSizeInBits(0) <= BitWidth &&
7753  "Extracted slice is bigger than the whole type!");
7754  APInt UsedBits(Inst->getValueSizeInBits(0), 0);
7755  UsedBits.setAllBits();
7756  UsedBits = UsedBits.zext(BitWidth);
7757  UsedBits <<= Shift;
7758  return UsedBits;
7759  }
7760 
7761  /// \brief Get the size of the slice to be loaded in bytes.
7762  unsigned getLoadedSize() const {
7763  unsigned SliceSize = getUsedBits().countPopulation();
7764  assert(!(SliceSize & 0x7) && "Size is not a multiple of a byte.");
7765  return SliceSize / 8;
7766  }
7767 
7768  /// \brief Get the type that will be loaded for this slice.
7769  /// Note: This may not be the final type for the slice.
7770  EVT getLoadedType() const {
7771  assert(DAG && "Missing context");
7772  LLVMContext &Ctxt = *DAG->getContext();
7773  return EVT::getIntegerVT(Ctxt, getLoadedSize() * 8);
7774  }
7775 
7776  /// \brief Get the alignment of the load used for this slice.
7777  unsigned getAlignment() const {
7778  unsigned Alignment = Origin->getAlignment();
7779  unsigned Offset = getOffsetFromBase();
7780  if (Offset != 0)
7781  Alignment = MinAlign(Alignment, Alignment + Offset);
7782  return Alignment;
7783  }
7784 
7785  /// \brief Check if this slice can be rewritten with legal operations.
7786  bool isLegal() const {
7787  // An invalid slice is not legal.
7788  if (!Origin || !Inst || !DAG)
7789  return false;
7790 
7791  // Offsets are for indexed load only, we do not handle that.
7792  if (Origin->getOffset().getOpcode() != ISD::UNDEF)
7793  return false;
7794 
7795  const TargetLowering &TLI = DAG->getTargetLoweringInfo();
7796 
7797  // Check that the type is legal.
7798  EVT SliceType = getLoadedType();
7799  if (!TLI.isTypeLegal(SliceType))
7800  return false;
7801 
7802  // Check that the load is legal for this type.
7803  if (!TLI.isOperationLegal(ISD::LOAD, SliceType))
7804  return false;
7805 
7806  // Check that the offset can be computed.
7807  // 1. Check its type.
7808  EVT PtrType = Origin->getBasePtr().getValueType();
7809  if (PtrType == MVT::Untyped || PtrType.isExtended())
7810  return false;
7811 
7812  // 2. Check that it fits in the immediate.
7813  if (!TLI.isLegalAddImmediate(getOffsetFromBase()))
7814  return false;
7815 
7816  // 3. Check that the computation is legal.
7817  if (!TLI.isOperationLegal(ISD::ADD, PtrType))
7818  return false;
7819 
7820  // Check that the zext is legal if it needs one.
7821  EVT TruncateType = Inst->getValueType(0);
7822  if (TruncateType != SliceType &&
7823  !TLI.isOperationLegal(ISD::ZERO_EXTEND, TruncateType))
7824  return false;
7825 
7826  return true;
7827  }
7828 
7829  /// \brief Get the offset in bytes of this slice in the original chunk of
7830  /// bits.
7831  /// \pre DAG != NULL.
7832  uint64_t getOffsetFromBase() const {
7833  assert(DAG && "Missing context.");
7834  bool IsBigEndian =
7835  DAG->getTargetLoweringInfo().getDataLayout()->isBigEndian();
7836  assert(!(Shift & 0x7) && "Shifts not aligned on Bytes are not supported.");
7837  uint64_t Offset = Shift / 8;
7838  unsigned TySizeInBytes = Origin->getValueSizeInBits(0) / 8;
7839  assert(!(Origin->getValueSizeInBits(0) & 0x7) &&
7840  "The size of the original loaded type is not a multiple of a"
7841  " byte.");
7842  // If Offset is bigger than TySizeInBytes, it means we are loading all
7843  // zeros. This should have been optimized before in the process.
7844  assert(TySizeInBytes > Offset &&
7845  "Invalid shift amount for given loaded size");
7846  if (IsBigEndian)
7847  Offset = TySizeInBytes - Offset - getLoadedSize();
7848  return Offset;
7849  }
7850 
7851  /// \brief Generate the sequence of instructions to load the slice
7852  /// represented by this object and redirect the uses of this slice to
7853  /// this new sequence of instructions.
7854  /// \pre this->Inst && this->Origin are valid Instructions and this
7855  /// object passed the legal check: LoadedSlice::isLegal returned true.
7856  /// \return The last instruction of the sequence used to load the slice.
7857  SDValue loadSlice() const {
7858  assert(Inst && Origin && "Unable to replace a non-existing slice.");
7859  const SDValue &OldBaseAddr = Origin->getBasePtr();
7860  SDValue BaseAddr = OldBaseAddr;
7861  // Get the offset in that chunk of bytes w.r.t. the endianess.
7862  int64_t Offset = static_cast<int64_t>(getOffsetFromBase());
7863  assert(Offset >= 0 && "Offset too big to fit in int64_t!");
7864  if (Offset) {
7865  // BaseAddr = BaseAddr + Offset.
7866  EVT ArithType = BaseAddr.getValueType();
7867  BaseAddr = DAG->getNode(ISD::ADD, SDLoc(Origin), ArithType, BaseAddr,
7868  DAG->getConstant(Offset, ArithType));
7869  }
7870 
7871  // Create the type of the loaded slice according to its size.
7872  EVT SliceType = getLoadedType();
7873 
7874  // Create the load for the slice.
7875  SDValue LastInst = DAG->getLoad(
7876  SliceType, SDLoc(Origin), Origin->getChain(), BaseAddr,
7877  Origin->getPointerInfo().getWithOffset(Offset), Origin->isVolatile(),
7878  Origin->isNonTemporal(), Origin->isInvariant(), getAlignment());
7879  // If the final type is not the same as the loaded type, this means that
7880  // we have to pad with zero. Create a zero extend for that.
7881  EVT FinalType = Inst->getValueType(0);
7882  if (SliceType != FinalType)
7883  LastInst =
7884  DAG->getNode(ISD::ZERO_EXTEND, SDLoc(LastInst), FinalType, LastInst);
7885  return LastInst;
7886  }
7887 
7888  /// \brief Check if this slice can be merged with an expensive cross register
7889  /// bank copy. E.g.,
7890  /// i = load i32
7891  /// f = bitcast i32 i to float
7892  bool canMergeExpensiveCrossRegisterBankCopy() const {
7893  if (!Inst || !Inst->hasOneUse())
7894  return false;
7895  SDNode *Use = *Inst->use_begin();
7896  if (Use->getOpcode() != ISD::BITCAST)
7897  return false;
7898  assert(DAG && "Missing context");
7899  const TargetLowering &TLI = DAG->getTargetLoweringInfo();
7900  EVT ResVT = Use->getValueType(0);
7901  const TargetRegisterClass *ResRC = TLI.getRegClassFor(ResVT.getSimpleVT());
7902  const TargetRegisterClass *ArgRC =
7904  if (ArgRC == ResRC || !TLI.isOperationLegal(ISD::LOAD, ResVT))
7905  return false;
7906 
7907  // At this point, we know that we perform a cross-register-bank copy.
7908  // Check if it is expensive.
7910  // Assume bitcasts are cheap, unless both register classes do not
7911  // explicitly share a common sub class.
7912  if (!TRI || TRI->getCommonSubClass(ArgRC, ResRC))
7913  return false;
7914 
7915  // Check if it will be merged with the load.
7916  // 1. Check the alignment constraint.
7917  unsigned RequiredAlignment = TLI.getDataLayout()->getABITypeAlignment(
7918  ResVT.getTypeForEVT(*DAG->getContext()));
7919 
7920  if (RequiredAlignment > getAlignment())
7921  return false;
7922 
7923  // 2. Check that the load is a legal operation for that type.
7924  if (!TLI.isOperationLegal(ISD::LOAD, ResVT))
7925  return false;
7926 
7927  // 3. Check that we do not have a zext in the way.
7928  if (Inst->getValueType(0) != getLoadedType())
7929  return false;
7930 
7931  return true;
7932  }
7933 };
7934 }
7935 
7936 /// \brief Sorts LoadedSlice according to their offset.
7938  bool operator()(const LoadedSlice &LHS, const LoadedSlice &RHS) {
7939  assert(LHS.Origin == RHS.Origin && "Different bases not implemented.");
7940  return LHS.getOffsetFromBase() < RHS.getOffsetFromBase();
7941  }
7942 };
7943 
7944 /// \brief Check that all bits set in \p UsedBits form a dense region, i.e.,
7945 /// \p UsedBits looks like 0..0 1..1 0..0.
7946 static bool areUsedBitsDense(const APInt &UsedBits) {
7947  // If all the bits are one, this is dense!
7948  if (UsedBits.isAllOnesValue())
7949  return true;
7950 
7951  // Get rid of the unused bits on the right.
7952  APInt NarrowedUsedBits = UsedBits.lshr(UsedBits.countTrailingZeros());
7953  // Get rid of the unused bits on the left.
7954  if (NarrowedUsedBits.countLeadingZeros())
7955  NarrowedUsedBits = NarrowedUsedBits.trunc(NarrowedUsedBits.getActiveBits());
7956  // Check that the chunk of bits is completely used.
7957  return NarrowedUsedBits.isAllOnesValue();
7958 }
7959 
7960 /// \brief Check whether or not \p First and \p Second are next to each other
7961 /// in memory. This means that there is no hole between the bits loaded
7962 /// by \p First and the bits loaded by \p Second.
7963 static bool areSlicesNextToEachOther(const LoadedSlice &First,
7964  const LoadedSlice &Second) {
7965  assert(First.Origin == Second.Origin && First.Origin &&
7966  "Unable to match different memory origins.");
7967  APInt UsedBits = First.getUsedBits();
7968  assert((UsedBits & Second.getUsedBits()) == 0 &&
7969  "Slices are not supposed to overlap.");
7970  UsedBits |= Second.getUsedBits();
7971  return areUsedBitsDense(UsedBits);
7972 }
7973 
7974 /// \brief Adjust the \p GlobalLSCost according to the target
7975 /// paring capabilities and the layout of the slices.
7976 /// \pre \p GlobalLSCost should account for at least as many loads as
7977 /// there is in the slices in \p LoadedSlices.
7979  LoadedSlice::Cost &GlobalLSCost) {
7980  unsigned NumberOfSlices = LoadedSlices.size();
7981  // If there is less than 2 elements, no pairing is possible.
7982  if (NumberOfSlices < 2)
7983  return;
7984 
7985  // Sort the slices so that elements that are likely to be next to each
7986  // other in memory are next to each other in the list.
7987  std::sort(LoadedSlices.begin(), LoadedSlices.end(), LoadedSliceSorter());
7988  const TargetLowering &TLI = LoadedSlices[0].DAG->getTargetLoweringInfo();
7989  // First (resp. Second) is the first (resp. Second) potentially candidate
7990  // to be placed in a paired load.
7991  const LoadedSlice *First = NULL;
7992  const LoadedSlice *Second = NULL;
7993  for (unsigned CurrSlice = 0; CurrSlice < NumberOfSlices; ++CurrSlice,
7994  // Set the beginning of the pair.
7995  First = Second) {
7996 
7997  Second = &LoadedSlices[CurrSlice];
7998 
7999  // If First is NULL, it means we start a new pair.
8000  // Get to the next slice.
8001  if (!First)
8002  continue;
8003 
8004  EVT LoadedType = First->getLoadedType();
8005 
8006  // If the types of the slices are different, we cannot pair them.
8007  if (LoadedType != Second->getLoadedType())
8008  continue;
8009 
8010  // Check if the target supplies paired loads for this type.
8011  unsigned RequiredAlignment = 0;
8012  if (!TLI.hasPairedLoad(LoadedType, RequiredAlignment)) {
8013  // move to the next pair, this type is hopeless.
8014  Second = NULL;
8015  continue;
8016  }
8017  // Check if we meet the alignment requirement.
8018  if (RequiredAlignment > First->getAlignment())
8019  continue;
8020 
8021  // Check that both loads are next to each other in memory.
8022  if (!areSlicesNextToEachOther(*First, *Second))
8023  continue;
8024 
8025  assert(GlobalLSCost.Loads > 0 && "We save more loads than we created!");
8026  --GlobalLSCost.Loads;
8027  // Move to the next pair.
8028  Second = NULL;
8029  }
8030 }
8031 
8032 /// \brief Check the profitability of all involved LoadedSlice.
8033 /// Currently, it is considered profitable if there is exactly two
8034 /// involved slices (1) which are (2) next to each other in memory, and
8035 /// whose cost (\see LoadedSlice::Cost) is smaller than the original load (3).
8036 ///
8037 /// Note: The order of the elements in \p LoadedSlices may be modified, but not
8038 /// the elements themselves.
8039 ///
8040 /// FIXME: When the cost model will be mature enough, we can relax
8041 /// constraints (1) and (2).
8043  const APInt &UsedBits, bool ForCodeSize) {
8044  unsigned NumberOfSlices = LoadedSlices.size();
8045  if (StressLoadSlicing)
8046  return NumberOfSlices > 1;
8047 
8048  // Check (1).
8049  if (NumberOfSlices != 2)
8050  return false;
8051 
8052  // Check (2).
8053  if (!areUsedBitsDense(UsedBits))
8054  return false;
8055 
8056  // Check (3).
8057  LoadedSlice::Cost OrigCost(ForCodeSize), GlobalSlicingCost(ForCodeSize);
8058  // The original code has one big load.
8059  OrigCost.Loads = 1;
8060  for (unsigned CurrSlice = 0; CurrSlice < NumberOfSlices; ++CurrSlice) {
8061  const LoadedSlice &LS = LoadedSlices[CurrSlice];
8062  // Accumulate the cost of all the slices.
8063  LoadedSlice::Cost SliceCost(LS, ForCodeSize);
8064  GlobalSlicingCost += SliceCost;
8065 
8066  // Account as cost in the original configuration the gain obtained
8067  // with the current slices.
8068  OrigCost.addSliceGain(LS);
8069  }
8070 
8071  // If the target supports paired load, adjust the cost accordingly.
8072  adjustCostForPairing(LoadedSlices, GlobalSlicingCost);
8073  return OrigCost > GlobalSlicingCost;
8074 }
8075 
8076 /// \brief If the given load, \p LI, is used only by trunc or trunc(lshr)
8077 /// operations, split it in the various pieces being extracted.
8078 ///
8079 /// This sort of thing is introduced by SROA.
8080 /// This slicing takes care not to insert overlapping loads.
8081 /// \pre LI is a simple load (i.e., not an atomic or volatile load).
8082 bool DAGCombiner::SliceUpLoad(SDNode *N) {
8083  if (Level < AfterLegalizeDAG)
8084  return false;
8085 
8086  LoadSDNode *LD = cast<LoadSDNode>(N);
8087  if (LD->isVolatile() || !ISD::isNormalLoad(LD) ||
8088  !LD->getValueType(0).isInteger())
8089  return false;
8090 
8091  // Keep track of already used bits to detect overlapping values.
8092  // In that case, we will just abort the transformation.
8093  APInt UsedBits(LD->getValueSizeInBits(0), 0);
8094 
8095  SmallVector<LoadedSlice, 4> LoadedSlices;
8096 
8097  // Check if this load is used as several smaller chunks of bits.
8098  // Basically, look for uses in trunc or trunc(lshr) and record a new chain
8099  // of computation for each trunc.
8100  for (SDNode::use_iterator UI = LD->use_begin(), UIEnd = LD->use_end();
8101  UI != UIEnd; ++UI) {
8102  // Skip the uses of the chain.
8103  if (UI.getUse().getResNo() != 0)
8104  continue;
8105 
8106  SDNode *User = *UI;
8107  unsigned Shift = 0;
8108 
8109  // Check if this is a trunc(lshr).
8110  if (User->getOpcode() == ISD::SRL && User->hasOneUse() &&
8111  isa<ConstantSDNode>(User->getOperand(1))) {
8112  Shift = cast<ConstantSDNode>(User->getOperand(1))->getZExtValue();
8113  User = *User->use_begin();
8114  }
8115 
8116  // At this point, User is a Truncate, iff we encountered, trunc or
8117  // trunc(lshr).
8118  if (User->getOpcode() != ISD::TRUNCATE)
8119  return false;
8120 
8121  // The width of the type must be a power of 2 and greater than 8-bits.
8122  // Otherwise the load cannot be represented in LLVM IR.
8123  // Moreover, if we shifted with a non 8-bits multiple, the slice
8124  // will be accross several bytes. We do not support that.
8125  unsigned Width = User->getValueSizeInBits(0);
8126  if (Width < 8 || !isPowerOf2_32(Width) || (Shift & 0x7))
8127  return 0;
8128 
8129  // Build the slice for this chain of computations.
8130  LoadedSlice LS(User, LD, Shift, &DAG);
8131  APInt CurrentUsedBits = LS.getUsedBits();
8132 
8133  // Check if this slice overlaps with another.
8134  if ((CurrentUsedBits & UsedBits) != 0)
8135  return false;
8136  // Update the bits used globally.
8137  UsedBits |= CurrentUsedBits;
8138 
8139  // Check if the new slice would be legal.
8140  if (!LS.isLegal())
8141  return false;
8142 
8143  // Record the slice.
8144  LoadedSlices.push_back(LS);
8145  }
8146 
8147  // Abort slicing if it does not seem to be profitable.
8148  if (!isSlicingProfitable(LoadedSlices, UsedBits, ForCodeSize))
8149  return false;
8150 
8151  ++SlicedLoads;
8152 
8153  // Rewrite each chain to use an independent load.
8154  // By construction, each chain can be represented by a unique load.
8155 
8156  // Prepare the argument for the new token factor for all the slices.
8157  SmallVector<SDValue, 8> ArgChains;
8159  LSIt = LoadedSlices.begin(),
8160  LSItEnd = LoadedSlices.end();
8161  LSIt != LSItEnd; ++LSIt) {
8162  SDValue SliceInst = LSIt->loadSlice();
8163  CombineTo(LSIt->Inst, SliceInst, true);
8164  if (SliceInst.getNode()->getOpcode() != ISD::LOAD)
8165  SliceInst = SliceInst.getOperand(0);
8166  assert(SliceInst->getOpcode() == ISD::LOAD &&
8167  "It takes more than a zext to get to the loaded slice!!");
8168  ArgChains.push_back(SliceInst.getValue(1));
8169  }
8170 
8171  SDValue Chain = DAG.getNode(ISD::TokenFactor, SDLoc(LD), MVT::Other,
8172  &ArgChains[0], ArgChains.size());
8173  DAG.ReplaceAllUsesOfValueWith(SDValue(N, 1), Chain);
8174  return true;
8175 }
8176 
8177 /// CheckForMaskedLoad - Check to see if V is (and load (ptr), imm), where the
8178 /// load is having specific bytes cleared out. If so, return the byte size
8179 /// being masked out and the shift amount.
8180 static std::pair<unsigned, unsigned>
8182  std::pair<unsigned, unsigned> Result(0, 0);
8183 
8184  // Check for the structure we're looking for.
8185  if (V->getOpcode() != ISD::AND ||
8186  !isa<ConstantSDNode>(V->getOperand(1)) ||
8188  return Result;
8189 
8190  // Check the chain and pointer.
8191  LoadSDNode *LD = cast<LoadSDNode>(V->getOperand(0));
8192  if (LD->getBasePtr() != Ptr) return Result; // Not from same pointer.
8193 
8194  // The store should be chained directly to the load or be an operand of a
8195  // tokenfactor.
8196  if (LD == Chain.getNode())
8197  ; // ok.
8198  else if (Chain->getOpcode() != ISD::TokenFactor)
8199  return Result; // Fail.
8200  else {
8201  bool isOk = false;
8202  for (unsigned i = 0, e = Chain->getNumOperands(); i != e; ++i)
8203  if (Chain->getOperand(i).getNode() == LD) {
8204  isOk = true;
8205  break;
8206  }
8207  if (!isOk) return Result;
8208  }
8209 
8210  // This only handles simple types.
8211  if (V.getValueType() != MVT::i16 &&
8212  V.getValueType() != MVT::i32 &&
8213  V.getValueType() != MVT::i64)
8214  return Result;
8215 
8216  // Check the constant mask. Invert it so that the bits being masked out are
8217  // 0 and the bits being kept are 1. Use getSExtValue so that leading bits
8218  // follow the sign bit for uniformity.
8219  uint64_t NotMask = ~cast<ConstantSDNode>(V->getOperand(1))->getSExtValue();
8220  unsigned NotMaskLZ = countLeadingZeros(NotMask);
8221  if (NotMaskLZ & 7) return Result; // Must be multiple of a byte.
8222  unsigned NotMaskTZ = countTrailingZeros(NotMask);
8223  if (NotMaskTZ & 7) return Result; // Must be multiple of a byte.
8224  if (NotMaskLZ == 64) return Result; // All zero mask.
8225 
8226  // See if we have a continuous run of bits. If so, we have 0*1+0*
8227  if (CountTrailingOnes_64(NotMask >> NotMaskTZ)+NotMaskTZ+NotMaskLZ != 64)
8228  return Result;
8229 
8230  // Adjust NotMaskLZ down to be from the actual size of the int instead of i64.
8231  if (V.getValueType() != MVT::i64 && NotMaskLZ)
8232  NotMaskLZ -= 64-V.getValueSizeInBits();
8233 
8234  unsigned MaskedBytes = (V.getValueSizeInBits()-NotMaskLZ-NotMaskTZ)/8;
8235  switch (MaskedBytes) {
8236  case 1:
8237  case 2:
8238  case 4: break;
8239  default: return Result; // All one mask, or 5-byte mask.
8240  }
8241 
8242  // Verify that the first bit starts at a multiple of mask so that the access
8243  // is aligned the same as the access width.
8244  if (NotMaskTZ && NotMaskTZ/8 % MaskedBytes) return Result;
8245 
8246  Result.first = MaskedBytes;
8247  Result.second = NotMaskTZ/8;
8248  return Result;
8249 }
8250 
8251 
8252 /// ShrinkLoadReplaceStoreWithStore - Check to see if IVal is something that
8253 /// provides a value as specified by MaskInfo. If so, replace the specified
8254 /// store with a narrower store of truncated IVal.
8255 static SDNode *
8256 ShrinkLoadReplaceStoreWithStore(const std::pair<unsigned, unsigned> &MaskInfo,
8257  SDValue IVal, StoreSDNode *St,
8258  DAGCombiner *DC) {
8259  unsigned NumBytes = MaskInfo.first;
8260  unsigned ByteShift = MaskInfo.second;
8261  SelectionDAG &DAG = DC->getDAG();
8262 
8263  // Check to see if IVal is all zeros in the part being masked in by the 'or'
8264  // that uses this. If not, this is not a replacement.
8265  APInt Mask = ~APInt::getBitsSet(IVal.getValueSizeInBits(),
8266  ByteShift*8, (ByteShift+NumBytes)*8);
8267  if (!DAG.MaskedValueIsZero(IVal, Mask)) return 0;
8268 
8269  // Check that it is legal on the target to do this. It is legal if the new
8270  // VT we're shrinking to (i8/i16/i32) is legal or we're still before type
8271  // legalization.
8272  MVT VT = MVT::getIntegerVT(NumBytes*8);
8273  if (!DC->isTypeLegal(VT))
8274  return 0;
8275 
8276  // Okay, we can do this! Replace the 'St' store with a store of IVal that is
8277  // shifted by ByteShift and truncated down to NumBytes.
8278  if (ByteShift)
8279  IVal = DAG.getNode(ISD::SRL, SDLoc(IVal), IVal.getValueType(), IVal,
8280  DAG.getConstant(ByteShift*8,
8281  DC->getShiftAmountTy(IVal.getValueType())));
8282 
8283  // Figure out the offset for the store and the alignment of the access.
8284  unsigned StOffset;
8285  unsigned NewAlign = St->getAlignment();
8286 
8288  StOffset = ByteShift;
8289  else
8290  StOffset = IVal.getValueType().getStoreSize() - ByteShift - NumBytes;
8291 
8292  SDValue Ptr = St->getBasePtr();
8293  if (StOffset) {
8294  Ptr = DAG.getNode(ISD::ADD, SDLoc(IVal), Ptr.getValueType(),
8295  Ptr, DAG.getConstant(StOffset, Ptr.getValueType()));
8296  NewAlign = MinAlign(NewAlign, StOffset);
8297  }
8298 
8299  // Truncate down to the new size.
8300  IVal = DAG.getNode(ISD::TRUNCATE, SDLoc(IVal), VT, IVal);
8301 
8302  ++OpsNarrowed;
8303  return DAG.getStore(St->getChain(), SDLoc(St), IVal, Ptr,
8304  St->getPointerInfo().getWithOffset(StOffset),
8305  false, false, NewAlign).getNode();
8306 }
8307 
8308 
8309 /// ReduceLoadOpStoreWidth - Look for sequence of load / op / store where op is
8310 /// one of 'or', 'xor', and 'and' of immediates. If 'op' is only touching some
8311 /// of the loaded bits, try narrowing the load and store if it would end up
8312 /// being a win for performance or code size.
8313 SDValue DAGCombiner::ReduceLoadOpStoreWidth(SDNode *N) {
8314  StoreSDNode *ST = cast<StoreSDNode>(N);
8315  if (ST->isVolatile())
8316  return SDValue();
8317 
8318  SDValue Chain = ST->getChain();
8319  SDValue Value = ST->getValue();
8320  SDValue Ptr = ST->getBasePtr();
8321  EVT VT = Value.getValueType();
8322 
8323  if (ST->isTruncatingStore() || VT.isVector() || !Value.hasOneUse())
8324  return SDValue();
8325 
8326  unsigned Opc = Value.getOpcode();
8327 
8328  // If this is "store (or X, Y), P" and X is "(and (load P), cst)", where cst
8329  // is a byte mask indicating a consecutive number of bytes, check to see if
8330  // Y is known to provide just those bytes. If so, we try to replace the
8331  // load + replace + store sequence with a single (narrower) store, which makes
8332  // the load dead.
8333  if (Opc == ISD::OR) {
8334  std::pair<unsigned, unsigned> MaskedLoad;
8335  MaskedLoad = CheckForMaskedLoad(Value.getOperand(0), Ptr, Chain);
8336  if (MaskedLoad.first)
8337  if (SDNode *NewST = ShrinkLoadReplaceStoreWithStore(MaskedLoad,
8338  Value.getOperand(1), ST,this))
8339  return SDValue(NewST, 0);
8340 
8341  // Or is commutative, so try swapping X and Y.
8342  MaskedLoad = CheckForMaskedLoad(Value.getOperand(1), Ptr, Chain);
8343  if (MaskedLoad.first)
8344  if (SDNode *NewST = ShrinkLoadReplaceStoreWithStore(MaskedLoad,
8345  Value.getOperand(0), ST,this))
8346  return SDValue(NewST, 0);
8347  }
8348 
8349  if ((Opc != ISD::OR && Opc != ISD::XOR && Opc != ISD::AND) ||
8350  Value.getOperand(1).getOpcode() != ISD::Constant)
8351  return SDValue();
8352 
8353  SDValue N0 = Value.getOperand(0);
8354  if (ISD::isNormalLoad(N0.getNode()) && N0.hasOneUse() &&
8355  Chain == SDValue(N0.getNode(), 1)) {
8356  LoadSDNode *LD = cast<LoadSDNode>(N0);
8357  if (LD->getBasePtr() != Ptr ||
8358  LD->getPointerInfo().getAddrSpace() !=
8359  ST->getPointerInfo().getAddrSpace())
8360  return SDValue();
8361 
8362  // Find the type to narrow it the load / op / store to.
8363  SDValue N1 = Value.getOperand(1);
8364  unsigned BitWidth = N1.getValueSizeInBits();
8365  APInt Imm = cast<ConstantSDNode>(N1)->getAPIntValue();
8366  if (Opc == ISD::AND)
8367  Imm ^= APInt::getAllOnesValue(BitWidth);
8368  if (Imm == 0 || Imm.isAllOnesValue())
8369  return SDValue();
8370  unsigned ShAmt = Imm.countTrailingZeros();
8371  unsigned MSB = BitWidth - Imm.countLeadingZeros() - 1;
8372  unsigned NewBW = NextPowerOf2(MSB - ShAmt);
8373  EVT NewVT = EVT::getIntegerVT(*DAG.getContext(), NewBW);
8374  while (NewBW < BitWidth &&
8375  !(TLI.isOperationLegalOrCustom(Opc, NewVT) &&
8376  TLI.isNarrowingProfitable(VT, NewVT))) {
8377  NewBW = NextPowerOf2(NewBW);
8378  NewVT = EVT::getIntegerVT(*DAG.getContext(), NewBW);
8379  }
8380  if (NewBW >= BitWidth)
8381  return SDValue();
8382 
8383  // If the lsb changed does not start at the type bitwidth boundary,
8384  // start at the previous one.
8385  if (ShAmt % NewBW)
8386  ShAmt = (((ShAmt + NewBW - 1) / NewBW) * NewBW) - NewBW;
8387  APInt Mask = APInt::getBitsSet(BitWidth, ShAmt,
8388  std::min(BitWidth, ShAmt + NewBW));
8389  if ((Imm & Mask) == Imm) {
8390  APInt NewImm = (Imm & Mask).lshr(ShAmt).trunc(NewBW);
8391  if (Opc == ISD::AND)
8392  NewImm ^= APInt::getAllOnesValue(NewBW);
8393  uint64_t PtrOff = ShAmt / 8;
8394  // For big endian targets, we need to adjust the offset to the pointer to
8395  // load the correct bytes.
8396  if (TLI.isBigEndian())
8397  PtrOff = (BitWidth + 7 - NewBW) / 8 - PtrOff;
8398 
8399  unsigned NewAlign = MinAlign(LD->getAlignment(), PtrOff);
8400  Type *NewVTTy = NewVT.getTypeForEVT(*DAG.getContext());
8401  if (NewAlign < TLI.getDataLayout()->getABITypeAlignment(NewVTTy))
8402  return SDValue();
8403 
8404  SDValue NewPtr = DAG.getNode(ISD::ADD, SDLoc(LD),
8405  Ptr.getValueType(), Ptr,
8406  DAG.getConstant(PtrOff, Ptr.getValueType()));
8407  SDValue NewLD = DAG.getLoad(NewVT, SDLoc(N0),
8408  LD->getChain(), NewPtr,
8409  LD->getPointerInfo().getWithOffset(PtrOff),
8410  LD->isVolatile(), LD->isNonTemporal(),
8411  LD->isInvariant(), NewAlign,
8412  LD->getTBAAInfo());
8413  SDValue NewVal = DAG.getNode(Opc, SDLoc(Value), NewVT, NewLD,
8414  DAG.getConstant(NewImm, NewVT));
8415  SDValue NewST = DAG.getStore(Chain, SDLoc(N),
8416  NewVal, NewPtr,
8417  ST->getPointerInfo().getWithOffset(PtrOff),
8418  false, false, NewAlign);
8419 
8420  AddToWorkList(NewPtr.getNode());
8421  AddToWorkList(NewLD.getNode());
8422  AddToWorkList(NewVal.getNode());
8423  WorkListRemover DeadNodes(*this);
8424  DAG.ReplaceAllUsesOfValueWith(N0.getValue(1), NewLD.getValue(1));
8425  ++OpsNarrowed;
8426  return NewST;
8427  }
8428  }
8429 
8430  return SDValue();
8431 }
8432 
8433 /// TransformFPLoadStorePair - For a given floating point load / store pair,
8434 /// if the load value isn't used by any other operations, then consider
8435 /// transforming the pair to integer load / store operations if the target
8436 /// deems the transformation profitable.
8437 SDValue DAGCombiner::TransformFPLoadStorePair(SDNode *N) {
8438  StoreSDNode *ST = cast<StoreSDNode>(N);
8439  SDValue Chain = ST->getChain();
8440  SDValue Value = ST->getValue();
8441  if (ISD::isNormalStore(ST) && ISD::isNormalLoad(Value.getNode()) &&
8442  Value.hasOneUse() &&
8443  Chain == SDValue(Value.getNode(), 1)) {
8444  LoadSDNode *LD = cast<LoadSDNode>(Value);
8445  EVT VT = LD->getMemoryVT();
8446  if (!VT.isFloatingPoint() ||
8447  VT != ST->getMemoryVT() ||
8448  LD->isNonTemporal() ||
8449  ST->isNonTemporal() ||
8450  LD->getPointerInfo().getAddrSpace() != 0 ||
8451  ST->getPointerInfo().getAddrSpace() != 0)
8452  return SDValue();
8453 
8454  EVT IntVT = EVT::getIntegerVT(*DAG.getContext(), VT.getSizeInBits());
8455  if (!TLI.isOperationLegal(ISD::LOAD, IntVT) ||
8456  !TLI.isOperationLegal(ISD::STORE, IntVT) ||
8459  return SDValue();
8460 
8461  unsigned LDAlign = LD->getAlignment();
8462  unsigned STAlign = ST->getAlignment();
8463  Type *IntVTTy = IntVT.getTypeForEVT(*DAG.getContext());
8464  unsigned ABIAlign = TLI.getDataLayout()->getABITypeAlignment(IntVTTy);
8465  if (LDAlign < ABIAlign || STAlign < ABIAlign)
8466  return SDValue();
8467 
8468  SDValue NewLD = DAG.getLoad(IntVT, SDLoc(Value),
8469  LD->getChain(), LD->getBasePtr(),
8470  LD->getPointerInfo(),
8471  false, false, false, LDAlign);
8472 
8473  SDValue NewST = DAG.getStore(NewLD.getValue(1), SDLoc(N),
8474  NewLD, ST->getBasePtr(),
8475  ST->getPointerInfo(),
8476  false, false, STAlign);
8477 
8478  AddToWorkList(NewLD.getNode());
8479  AddToWorkList(NewST.getNode());
8480  WorkListRemover DeadNodes(*this);
8481  DAG.ReplaceAllUsesOfValueWith(Value.getValue(1), NewLD.getValue(1));
8482  ++LdStFP2Int;
8483  return NewST;
8484  }
8485 
8486  return SDValue();
8487 }
8488 
8489 /// Helper struct to parse and store a memory address as base + index + offset.
8490 /// We ignore sign extensions when it is safe to do so.
8491 /// The following two expressions are not equivalent. To differentiate we need
8492 /// to store whether there was a sign extension involved in the index
8493 /// computation.
8494 /// (load (i64 add (i64 copyfromreg %c)
8495 /// (i64 signextend (add (i8 load %index)
8496 /// (i8 1))))
8497 /// vs
8498 ///
8499 /// (load (i64 add (i64 copyfromreg %c)
8500 /// (i64 signextend (i32 add (i32 signextend (i8 load %index))
8501 /// (i32 1)))))
8505  int64_t Offset;
8507 
8508  BaseIndexOffset() : Offset(0), IsIndexSignExt(false) {}
8509 
8510  BaseIndexOffset(SDValue Base, SDValue Index, int64_t Offset,
8511  bool IsIndexSignExt) :
8512  Base(Base), Index(Index), Offset(Offset), IsIndexSignExt(IsIndexSignExt) {}
8513 
8514  bool equalBaseIndex(const BaseIndexOffset &Other) {
8515  return Other.Base == Base && Other.Index == Index &&
8516  Other.IsIndexSignExt == IsIndexSignExt;
8517  }
8518 
8519  /// Parses tree in Ptr for base, index, offset addresses.
8521  bool IsIndexSignExt = false;
8522 
8523  // We only can pattern match BASE + INDEX + OFFSET. If Ptr is not an ADD
8524  // instruction, then it could be just the BASE or everything else we don't
8525  // know how to handle. Just use Ptr as BASE and give up.
8526  if (Ptr->getOpcode() != ISD::ADD)
8527  return BaseIndexOffset(Ptr, SDValue(), 0, IsIndexSignExt);
8528 
8529  // We know that we have at least an ADD instruction. Try to pattern match
8530  // the simple case of BASE + OFFSET.
8531  if (isa<ConstantSDNode>(Ptr->getOperand(1))) {
8532  int64_t Offset = cast<ConstantSDNode>(Ptr->getOperand(1))->getSExtValue();
8533  return BaseIndexOffset(Ptr->getOperand(0), SDValue(), Offset,
8534  IsIndexSignExt);
8535  }
8536 
8537  // Inside a loop the current BASE pointer is calculated using an ADD and a
8538  // MUL instruction. In this case Ptr is the actual BASE pointer.
8539  // (i64 add (i64 %array_ptr)
8540  // (i64 mul (i64 %induction_var)
8541  // (i64 %element_size)))
8542  if (Ptr->getOperand(1)->getOpcode() == ISD::MUL)
8543  return BaseIndexOffset(Ptr, SDValue(), 0, IsIndexSignExt);
8544 
8545  // Look at Base + Index + Offset cases.
8546  SDValue Base = Ptr->getOperand(0);
8547  SDValue IndexOffset = Ptr->getOperand(1);
8548 
8549  // Skip signextends.
8550  if (IndexOffset->getOpcode() == ISD::SIGN_EXTEND) {
8551  IndexOffset = IndexOffset->getOperand(0);
8552  IsIndexSignExt = true;
8553  }
8554 
8555  // Either the case of Base + Index (no offset) or something else.
8556  if (IndexOffset->getOpcode() != ISD::ADD)
8557  return BaseIndexOffset(Base, IndexOffset, 0, IsIndexSignExt);
8558 
8559  // Now we have the case of Base + Index + offset.
8560  SDValue Index = IndexOffset->getOperand(0);
8561  SDValue Offset = IndexOffset->getOperand(1);
8562 
8563  if (!isa<ConstantSDNode>(Offset))
8564  return BaseIndexOffset(Ptr, SDValue(), 0, IsIndexSignExt);
8565 
8566  // Ignore signextends.
8567  if (Index->getOpcode() == ISD::SIGN_EXTEND) {
8568  Index = Index->getOperand(0);
8569  IsIndexSignExt = true;
8570  } else IsIndexSignExt = false;
8571 
8572  int64_t Off = cast<ConstantSDNode>(Offset)->getSExtValue();
8573  return BaseIndexOffset(Base, Index, Off, IsIndexSignExt);
8574  }
8575 };
8576 
8577 /// Holds a pointer to an LSBaseSDNode as well as information on where it
8578 /// is located in a sequence of memory operations connected by a chain.
8579 struct MemOpLink {
8580  MemOpLink (LSBaseSDNode *N, int64_t Offset, unsigned Seq):
8581  MemNode(N), OffsetFromBase(Offset), SequenceNum(Seq) { }
8582  // Ptr to the mem node.
8584  // Offset from the base ptr.
8586  // What is the sequence number of this mem node.
8587  // Lowest mem operand in the DAG starts at zero.
8588  unsigned SequenceNum;
8589 };
8590 
8591 /// Sorts store nodes in a link according to their offset from a shared
8592 // base ptr.
8594  bool operator()(MemOpLink LHS, MemOpLink RHS) {
8595  return LHS.OffsetFromBase < RHS.OffsetFromBase;
8596  }
8597 };
8598 
8599 bool DAGCombiner::MergeConsecutiveStores(StoreSDNode* St) {
8600  EVT MemVT = St->getMemoryVT();
8601  int64_t ElementSizeBytes = MemVT.getSizeInBits()/8;
8602  bool NoVectors = DAG.getMachineFunction().getFunction()->getAttributes().
8603  hasAttribute(AttributeSet::FunctionIndex, Attribute::NoImplicitFloat);
8604 
8605  // Don't merge vectors into wider inputs.
8606  if (MemVT.isVector() || !MemVT.isSimple())
8607  return false;
8608 
8609  // Perform an early exit check. Do not bother looking at stored values that
8610  // are not constants or loads.
8611  SDValue StoredVal = St->getValue();
8612  bool IsLoadSrc = isa<LoadSDNode>(StoredVal);
8613  if (!isa<ConstantSDNode>(StoredVal) && !isa<ConstantFPSDNode>(StoredVal) &&
8614  !IsLoadSrc)
8615  return false;
8616 
8617  // Only look at ends of store sequences.
8618  SDValue Chain = SDValue(St, 1);
8619  if (Chain->hasOneUse() && Chain->use_begin()->getOpcode() == ISD::STORE)
8620  return false;
8621 
8622  // This holds the base pointer, index, and the offset in bytes from the base
8623  // pointer.
8625 
8626  // We must have a base and an offset.
8627  if (!BasePtr.Base.getNode())
8628  return false;
8629 
8630  // Do not handle stores to undef base pointers.
8631  if (BasePtr.Base.getOpcode() == ISD::UNDEF)
8632  return false;
8633 
8634  // Save the LoadSDNodes that we find in the chain.
8635  // We need to make sure that these nodes do not interfere with
8636  // any of the store nodes.
8637  SmallVector<LSBaseSDNode*, 8> AliasLoadNodes;
8638 
8639  // Save the StoreSDNodes that we find in the chain.
8640  SmallVector<MemOpLink, 8> StoreNodes;
8641 
8642  // Walk up the chain and look for nodes with offsets from the same
8643  // base pointer. Stop when reaching an instruction with a different kind
8644  // or instruction which has a different base pointer.
8645  unsigned Seq = 0;
8646  StoreSDNode *Index = St;
8647  while (Index) {
8648  // If the chain has more than one use, then we can't reorder the mem ops.
8649  if (Index != St && !SDValue(Index, 1)->hasOneUse())
8650  break;
8651 
8652  // Find the base pointer and offset for this memory node.
8654 
8655  // Check that the base pointer is the same as the original one.
8656  if (!Ptr.equalBaseIndex(BasePtr))
8657  break;
8658 
8659  // Check that the alignment is the same.
8660  if (Index->getAlignment() != St->getAlignment())
8661  break;
8662 
8663  // The memory operands must not be volatile.
8664  if (Index->isVolatile() || Index->isIndexed())
8665  break;
8666 
8667  // No truncation.
8668  if (StoreSDNode *St = dyn_cast<StoreSDNode>(Index))
8669  if (St->isTruncatingStore())
8670  break;
8671 
8672  // The stored memory type must be the same.
8673  if (Index->getMemoryVT() != MemVT)
8674  break;
8675 
8676  // We do not allow unaligned stores because we want to prevent overriding
8677  // stores.
8678  if (Index->getAlignment()*8 != MemVT.getSizeInBits())
8679  break;
8680 
8681  // We found a potential memory operand to merge.
8682  StoreNodes.push_back(MemOpLink(Index, Ptr.Offset, Seq++));
8683 
8684  // Find the next memory operand in the chain. If the next operand in the
8685  // chain is a store then move up and continue the scan with the next
8686  // memory operand. If the next operand is a load save it and use alias
8687  // information to check if it interferes with anything.
8688  SDNode *NextInChain = Index->getChain().getNode();
8689  while (1) {
8690  if (StoreSDNode *STn = dyn_cast<StoreSDNode>(NextInChain)) {
8691  // We found a store node. Use it for the next iteration.
8692  Index = STn;
8693  break;
8694  } else if (LoadSDNode *Ldn = dyn_cast<LoadSDNode>(NextInChain)) {
8695  if (Ldn->isVolatile()) {
8696  Index = NULL;
8697  break;
8698  }
8699 
8700  // Save the load node for later. Continue the scan.
8701  AliasLoadNodes.push_back(Ldn);
8702  NextInChain = Ldn->getChain().getNode();
8703  continue;
8704  } else {
8705  Index = NULL;
8706  break;
8707  }
8708  }
8709  }
8710 
8711  // Check if there is anything to merge.
8712  if (StoreNodes.size() < 2)
8713  return false;
8714 
8715  // Sort the memory operands according to their distance from the base pointer.
8716  std::sort(StoreNodes.begin(), StoreNodes.end(),
8718 
8719  // Scan the memory operations on the chain and find the first non-consecutive
8720  // store memory address.
8721  unsigned LastConsecutiveStore = 0;
8722  int64_t StartAddress = StoreNodes[0].OffsetFromBase;
8723  for (unsigned i = 0, e = StoreNodes.size(); i < e; ++i) {
8724 
8725  // Check that the addresses are consecutive starting from the second
8726  // element in the list of stores.
8727  if (i > 0) {
8728  int64_t CurrAddress = StoreNodes[i].OffsetFromBase;
8729  if (CurrAddress - StartAddress != (ElementSizeBytes * i))
8730  break;
8731  }
8732 
8733  bool Alias = false;
8734  // Check if this store interferes with any of the loads that we found.
8735  for (unsigned ld = 0, lde = AliasLoadNodes.size(); ld < lde; ++ld)
8736  if (isAlias(AliasLoadNodes[ld], StoreNodes[i].MemNode)) {
8737  Alias = true;
8738  break;
8739  }
8740  // We found a load that alias with this store. Stop the sequence.
8741  if (Alias)
8742  break;
8743 
8744  // Mark this node as useful.
8745  LastConsecutiveStore = i;
8746  }
8747 
8748  // The node with the lowest store address.
8749  LSBaseSDNode *FirstInChain = StoreNodes[0].MemNode;
8750 
8751  // Store the constants into memory as one consecutive store.
8752  if (!IsLoadSrc) {
8753  unsigned LastLegalType = 0;
8754  unsigned LastLegalVectorType = 0;
8755  bool NonZero = false;
8756  for (unsigned i=0; i<LastConsecutiveStore+1; ++i) {
8757  StoreSDNode *St = cast<StoreSDNode>(StoreNodes[i].MemNode);
8758  SDValue StoredVal = St->getValue();
8759 
8760  if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(StoredVal)) {
8761  NonZero |= !C->isNullValue();
8762  } else if (ConstantFPSDNode *C = dyn_cast<ConstantFPSDNode>(StoredVal)) {
8763  NonZero |= !C->getConstantFPValue()->isNullValue();
8764  } else {
8765  // Non constant.
8766  break;
8767  }
8768 
8769  // Find a legal type for the constant store.
8770  unsigned StoreBW = (i+1) * ElementSizeBytes * 8;
8771  EVT StoreTy = EVT::getIntegerVT(*DAG.getContext(), StoreBW);
8772  if (TLI.isTypeLegal(StoreTy))
8773  LastLegalType = i+1;
8774  // Or check whether a truncstore is legal.
8775  else if (TLI.getTypeAction(*DAG.getContext(), StoreTy) ==
8777  EVT LegalizedStoredValueTy =
8778  TLI.getTypeToTransformTo(*DAG.getContext(), StoredVal.getValueType());
8779  if (TLI.isTruncStoreLegal(LegalizedStoredValueTy, StoreTy))
8780  LastLegalType = i+1;
8781  }
8782 
8783  // Find a legal type for the vector store.
8784  EVT Ty = EVT::getVectorVT(*DAG.getContext(), MemVT, i+1);
8785  if (TLI.isTypeLegal(Ty))
8786  LastLegalVectorType = i + 1;
8787  }
8788 
8789  // We only use vectors if the constant is known to be zero and the
8790  // function is not marked with the noimplicitfloat attribute.
8791  if (NonZero || NoVectors)
8792  LastLegalVectorType = 0;
8793 
8794  // Check if we found a legal integer type to store.
8795  if (LastLegalType == 0 && LastLegalVectorType == 0)
8796  return false;
8797 
8798  bool UseVector = (LastLegalVectorType > LastLegalType) && !NoVectors;
8799  unsigned NumElem = UseVector ? LastLegalVectorType : LastLegalType;
8800 
8801  // Make sure we have something to merge.
8802  if (NumElem < 2)
8803  return false;
8804 
8805  unsigned EarliestNodeUsed = 0;
8806  for (unsigned i=0; i < NumElem; ++i) {
8807  // Find a chain for the new wide-store operand. Notice that some
8808  // of the store nodes that we found may not be selected for inclusion
8809  // in the wide store. The chain we use needs to be the chain of the
8810  // earliest store node which is *used* and replaced by the wide store.
8811  if (StoreNodes[i].SequenceNum > StoreNodes[EarliestNodeUsed].SequenceNum)
8812  EarliestNodeUsed = i;
8813  }
8814 
8815  // The earliest Node in the DAG.
8816  LSBaseSDNode *EarliestOp = StoreNodes[EarliestNodeUsed].MemNode;
8817  SDLoc DL(StoreNodes[0].MemNode);
8818 
8819  SDValue StoredVal;
8820  if (UseVector) {
8821  // Find a legal type for the vector store.
8822  EVT Ty = EVT::getVectorVT(*DAG.getContext(), MemVT, NumElem);
8823  assert(TLI.isTypeLegal(Ty) && "Illegal vector store");
8824  StoredVal = DAG.getConstant(0, Ty);
8825  } else {
8826  unsigned StoreBW = NumElem * ElementSizeBytes * 8;
8827  APInt StoreInt(StoreBW, 0);
8828 
8829  // Construct a single integer constant which is made of the smaller
8830  // constant inputs.
8831  bool IsLE = TLI.isLittleEndian();
8832  for (unsigned i = 0; i < NumElem ; ++i) {
8833  unsigned Idx = IsLE ?(NumElem - 1 - i) : i;
8834  StoreSDNode *St = cast<StoreSDNode>(StoreNodes[Idx].MemNode);
8835  SDValue Val = St->getValue();
8836  StoreInt<<=ElementSizeBytes*8;
8837  if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(Val)) {
8838  StoreInt|=C->getAPIntValue().zext(StoreBW);
8839  } else if (ConstantFPSDNode *C = dyn_cast<ConstantFPSDNode>(Val)) {
8840  StoreInt|= C->getValueAPF().bitcastToAPInt().zext(StoreBW);
8841  } else {
8842  assert(false && "Invalid constant element type");
8843  }
8844  }
8845 
8846  // Create the new Load and Store operations.
8847  EVT StoreTy = EVT::getIntegerVT(*DAG.getContext(), StoreBW);
8848  StoredVal = DAG.getConstant(StoreInt, StoreTy);
8849  }
8850 
8851  SDValue NewStore = DAG.getStore(EarliestOp->getChain(), DL, StoredVal,
8852  FirstInChain->getBasePtr(),
8853  FirstInChain->getPointerInfo(),
8854  false, false,
8855  FirstInChain->getAlignment());
8856 
8857  // Replace the first store with the new store
8858  CombineTo(EarliestOp, NewStore);
8859  // Erase all other stores.
8860  for (unsigned i = 0; i < NumElem ; ++i) {
8861  if (StoreNodes[i].MemNode == EarliestOp)
8862  continue;
8863  StoreSDNode *St = cast<StoreSDNode>(StoreNodes[i].MemNode);
8864  // ReplaceAllUsesWith will replace all uses that existed when it was
8865  // called, but graph optimizations may cause new ones to appear. For
8866  // example, the case in pr14333 looks like
8867  //
8868  // St's chain -> St -> another store -> X
8869  //
8870  // And the only difference from St to the other store is the chain.
8871  // When we change it's chain to be St's chain they become identical,
8872  // get CSEed and the net result is that X is now a use of St.
8873  // Since we know that St is redundant, just iterate.
8874  while (!St->use_empty())
8875  DAG.ReplaceAllUsesWith(SDValue(St, 0), St->getChain());
8876  removeFromWorkList(St);
8877  DAG.DeleteNode(St);
8878  }
8879 
8880  return true;
8881  }
8882 
8883  // Below we handle the case of multiple consecutive stores that
8884  // come from multiple consecutive loads. We merge them into a single
8885  // wide load and a single wide store.
8886 
8887  // Look for load nodes which are used by the stored values.
8888  SmallVector<MemOpLink, 8> LoadNodes;
8889 
8890  // Find acceptable loads. Loads need to have the same chain (token factor),
8891  // must not be zext, volatile, indexed, and they must be consecutive.
8892  BaseIndexOffset LdBasePtr;
8893  for (unsigned i=0; i<LastConsecutiveStore+1; ++i) {
8894  StoreSDNode *St = cast<StoreSDNode>(StoreNodes[i].MemNode);
8895  LoadSDNode *Ld = dyn_cast<LoadSDNode>(St->getValue());
8896  if (!Ld) break;
8897 
8898  // Loads must only have one use.
8899  if (!Ld->hasNUsesOfValue(1, 0))
8900  break;
8901 
8902  // Check that the alignment is the same as the stores.
8903  if (Ld->getAlignment() != St->getAlignment())
8904  break;
8905 
8906  // The memory operands must not be volatile.
8907  if (Ld->isVolatile() || Ld->isIndexed())
8908  break;
8909 
8910  // We do not accept ext loads.
8911  if (Ld->getExtensionType() != ISD::NON_EXTLOAD)
8912  break;
8913 
8914  // The stored memory type must be the same.
8915  if (Ld->getMemoryVT() != MemVT)
8916  break;
8917 
8918  BaseIndexOffset LdPtr = BaseIndexOffset::match(Ld->getBasePtr());
8919  // If this is not the first ptr that we check.
8920  if (LdBasePtr.Base.getNode()) {
8921  // The base ptr must be the same.
8922  if (!LdPtr.equalBaseIndex(LdBasePtr))
8923  break;
8924  } else {
8925  // Check that all other base pointers are the same as this one.
8926  LdBasePtr = LdPtr;
8927  }
8928 
8929  // We found a potential memory operand to merge.
8930  LoadNodes.push_back(MemOpLink(Ld, LdPtr.Offset, 0));
8931  }
8932 
8933  if (LoadNodes.size() < 2)
8934  return false;
8935 
8936  // Scan the memory operations on the chain and find the first non-consecutive
8937  // load memory address. These variables hold the index in the store node
8938  // array.
8939  unsigned LastConsecutiveLoad = 0;
8940  // This variable refers to the size and not index in the array.
8941  unsigned LastLegalVectorType = 0;
8942  unsigned LastLegalIntegerType = 0;
8943  StartAddress = LoadNodes[0].OffsetFromBase;
8944  SDValue FirstChain = LoadNodes[0].MemNode->getChain();
8945  for (unsigned i = 1; i < LoadNodes.size(); ++i) {
8946  // All loads much share the same chain.
8947  if (LoadNodes[i].MemNode->getChain() != FirstChain)
8948  break;
8949 
8950  int64_t CurrAddress = LoadNodes[i].OffsetFromBase;
8951  if (CurrAddress - StartAddress != (ElementSizeBytes * i))
8952  break;
8953  LastConsecutiveLoad = i;
8954 
8955  // Find a legal type for the vector store.
8956  EVT StoreTy = EVT::getVectorVT(*DAG.getContext(), MemVT, i+1);
8957  if (TLI.isTypeLegal(StoreTy))
8958  LastLegalVectorType = i + 1;
8959 
8960  // Find a legal type for the integer store.
8961  unsigned StoreBW = (i+1) * ElementSizeBytes * 8;
8962  StoreTy = EVT::getIntegerVT(*DAG.getContext(), StoreBW);
8963  if (TLI.isTypeLegal(StoreTy))
8964  LastLegalIntegerType = i + 1;
8965  // Or check whether a truncstore and extload is legal.
8966  else if (TLI.getTypeAction(*DAG.getContext(), StoreTy) ==
8968  EVT LegalizedStoredValueTy =
8969  TLI.getTypeToTransformTo(*DAG.getContext(), StoreTy);
8970  if (TLI.isTruncStoreLegal(LegalizedStoredValueTy, StoreTy) &&
8971  TLI.isLoadExtLegal(ISD::ZEXTLOAD, StoreTy) &&
8972  TLI.isLoadExtLegal(ISD::SEXTLOAD, StoreTy) &&
8973  TLI.isLoadExtLegal(ISD::EXTLOAD, StoreTy))
8974  LastLegalIntegerType = i+1;
8975  }
8976  }
8977 
8978  // Only use vector types if the vector type is larger than the integer type.
8979  // If they are the same, use integers.
8980  bool UseVectorTy = LastLegalVectorType > LastLegalIntegerType && !NoVectors;
8981  unsigned LastLegalType = std::max(LastLegalVectorType, LastLegalIntegerType);
8982 
8983  // We add +1 here because the LastXXX variables refer to location while
8984  // the NumElem refers to array/index size.
8985  unsigned NumElem = std::min(LastConsecutiveStore, LastConsecutiveLoad) + 1;
8986  NumElem = std::min(LastLegalType, NumElem);
8987 
8988  if (NumElem < 2)
8989  return false;
8990 
8991  // The earliest Node in the DAG.
8992  unsigned EarliestNodeUsed = 0;
8993  LSBaseSDNode *EarliestOp = StoreNodes[EarliestNodeUsed].MemNode;
8994  for (unsigned i=1; i<NumElem; ++i) {
8995  // Find a chain for the new wide-store operand. Notice that some
8996  // of the store nodes that we found may not be selected for inclusion
8997  // in the wide store. The chain we use needs to be the chain of the
8998  // earliest store node which is *used* and replaced by the wide store.
8999  if (StoreNodes[i].SequenceNum > StoreNodes[EarliestNodeUsed].SequenceNum)
9000  EarliestNodeUsed = i;
9001  }
9002 
9003  // Find if it is better to use vectors or integers to load and store
9004  // to memory.
9005  EVT JointMemOpVT;
9006  if (UseVectorTy) {
9007  JointMemOpVT = EVT::getVectorVT(*DAG.getContext(), MemVT, NumElem);
9008  } else {
9009  unsigned StoreBW = NumElem * ElementSizeBytes * 8;
9010  JointMemOpVT = EVT::getIntegerVT(*DAG.getContext(), StoreBW);
9011  }
9012 
9013  SDLoc LoadDL(LoadNodes[0].MemNode);
9014  SDLoc StoreDL(StoreNodes[0].MemNode);
9015 
9016  LoadSDNode *FirstLoad = cast<LoadSDNode>(LoadNodes[0].MemNode);
9017  SDValue NewLoad = DAG.getLoad(JointMemOpVT, LoadDL,
9018  FirstLoad->getChain(),
9019  FirstLoad->getBasePtr(),
9020  FirstLoad->getPointerInfo(),
9021  false, false, false,
9022  FirstLoad->getAlignment());
9023 
9024  SDValue NewStore = DAG.getStore(EarliestOp->getChain(), StoreDL, NewLoad,
9025  FirstInChain->getBasePtr(),
9026  FirstInChain->getPointerInfo(), false, false,
9027  FirstInChain->getAlignment());
9028 
9029  // Replace one of the loads with the new load.
9030  LoadSDNode *Ld = cast<LoadSDNode>(LoadNodes[0].MemNode);
9031  DAG.ReplaceAllUsesOfValueWith(SDValue(Ld, 1),
9032  SDValue(NewLoad.getNode(), 1));
9033 
9034  // Remove the rest of the load chains.
9035  for (unsigned i = 1; i < NumElem ; ++i) {
9036  // Replace all chain users of the old load nodes with the chain of the new
9037  // load node.
9038  LoadSDNode *Ld = cast<LoadSDNode>(LoadNodes[i].MemNode);
9039  DAG.ReplaceAllUsesOfValueWith(SDValue(Ld, 1), Ld->getChain());
9040  }
9041 
9042  // Replace the first store with the new store.
9043  CombineTo(EarliestOp, NewStore);
9044  // Erase all other stores.
9045  for (unsigned i = 0; i < NumElem ; ++i) {
9046  // Remove all Store nodes.
9047  if (StoreNodes[i].MemNode == EarliestOp)
9048  continue;
9049  StoreSDNode *St = cast<StoreSDNode>(StoreNodes[i].MemNode);
9050  DAG.ReplaceAllUsesOfValueWith(SDValue(St, 0), St->getChain());
9051  removeFromWorkList(St);
9052  DAG.DeleteNode(St);
9053  }
9054 
9055  return true;
9056 }
9057 
9058 SDValue DAGCombiner::visitSTORE(SDNode *N) {
9059  StoreSDNode *ST = cast<StoreSDNode>(N);
9060  SDValue Chain = ST->getChain();
9061  SDValue Value = ST->getValue();
9062  SDValue Ptr = ST->getBasePtr();
9063 
9064  // If this is a store of a bit convert, store the input value if the
9065  // resultant store does not need a higher alignment than the original.
9066  if (Value.getOpcode() == ISD::BITCAST && !ST->isTruncatingStore() &&
9067  ST->isUnindexed()) {
9068  unsigned OrigAlign = ST->getAlignment();
9069  EVT SVT = Value.getOperand(0).getValueType();
9070  unsigned Align = TLI.getDataLayout()->
9071  getABITypeAlignment(SVT.getTypeForEVT(*DAG.getContext()));
9072  if (Align <= OrigAlign &&
9073  ((!LegalOperations && !ST->isVolatile()) ||
9075  return DAG.getStore(Chain, SDLoc(N), Value.getOperand(0),
9076  Ptr, ST->getPointerInfo(), ST->isVolatile(),
9077  ST->isNonTemporal(), OrigAlign,
9078  ST->getTBAAInfo());
9079  }
9080 
9081  // Turn 'store undef, Ptr' -> nothing.
9082  if (Value.getOpcode() == ISD::UNDEF && ST->isUnindexed())
9083  return Chain;
9084 
9085  // Turn 'store float 1.0, Ptr' -> 'store int 0x12345678, Ptr'
9086  if (ConstantFPSDNode *CFP = dyn_cast<ConstantFPSDNode>(Value)) {
9087  // NOTE: If the original store is volatile, this transform must not increase
9088  // the number of stores. For example, on x86-32 an f64 can be stored in one
9089  // processor operation but an i64 (which is not legal) requires two. So the
9090  // transform should not be done in this case.
9091  if (Value.getOpcode() != ISD::TargetConstantFP) {
9092  SDValue Tmp;
9093  switch (CFP->getSimpleValueType(0).SimpleTy) {
9094  default: llvm_unreachable("Unknown FP type");
9095  case MVT::f16: // We don't do this for these yet.
9096  case MVT::f80:
9097  case MVT::f128:
9098  case MVT::ppcf128:
9099  break;
9100  case MVT::f32:
9101  if ((isTypeLegal(MVT::i32) && !LegalOperations && !ST->isVolatile()) ||
9103  Tmp = DAG.getConstant((uint32_t)CFP->getValueAPF().
9104  bitcastToAPInt().getZExtValue(), MVT::i32);
9105  return DAG.getStore(Chain, SDLoc(N), Tmp,
9106  Ptr, ST->getMemOperand());
9107  }
9108  break;
9109  case MVT::f64:
9110  if ((TLI.isTypeLegal(MVT::i64) && !LegalOperations &&
9111  !ST->isVolatile()) ||
9113  Tmp = DAG.getConstant(CFP->getValueAPF().bitcastToAPInt().
9114  getZExtValue(), MVT::i64);
9115  return DAG.getStore(Chain, SDLoc(N), Tmp,
9116  Ptr, ST->getMemOperand());
9117  }
9118 
9119  if (!ST->isVolatile() &&
9121  // Many FP stores are not made apparent until after legalize, e.g. for
9122  // argument passing. Since this is so common, custom legalize the
9123  // 64-bit integer store into two 32-bit stores.
9124  uint64_t Val = CFP->getValueAPF().bitcastToAPInt().getZExtValue();
9125  SDValue Lo = DAG.getConstant(Val & 0xFFFFFFFF, MVT::i32);
9126  SDValue Hi = DAG.getConstant(Val >> 32, MVT::i32);
9127  if (TLI.isBigEndian()) std::swap(Lo, Hi);
9128 
9129  unsigned Alignment = ST->getAlignment();
9130  bool isVolatile = ST->isVolatile();
9131  bool isNonTemporal = ST->isNonTemporal();
9132  const MDNode *TBAAInfo = ST->getTBAAInfo();
9133 
9134  SDValue St0 = DAG.getStore(Chain, SDLoc(ST), Lo,
9135  Ptr, ST->getPointerInfo(),
9136  isVolatile, isNonTemporal,
9137  ST->getAlignment(), TBAAInfo);
9138  Ptr = DAG.getNode(ISD::ADD, SDLoc(N), Ptr.getValueType(), Ptr,
9139  DAG.getConstant(4, Ptr.getValueType()));
9140  Alignment = MinAlign(Alignment, 4U);
9141  SDValue St1 = DAG.getStore(Chain, SDLoc(ST), Hi,
9142  Ptr, ST->getPointerInfo().getWithOffset(4),
9143  isVolatile, isNonTemporal,
9144  Alignment, TBAAInfo);
9145  return DAG.getNode(ISD::TokenFactor, SDLoc(N), MVT::Other,
9146  St0, St1);
9147  }
9148 
9149  break;
9150  }
9151  }
9152  }
9153 
9154  // Try to infer better alignment information than the store already has.
9155  if (OptLevel != CodeGenOpt::None && ST->isUnindexed()) {
9156  if (unsigned Align = DAG.InferPtrAlignment(Ptr)) {
9157  if (Align > ST->getAlignment())
9158  return DAG.getTruncStore(Chain, SDLoc(N), Value,
9159  Ptr, ST->getPointerInfo(), ST->getMemoryVT(),
9160  ST->isVolatile(), ST->isNonTemporal(), Align,
9161  ST->getTBAAInfo());
9162  }
9163  }
9164 
9165  // Try transforming a pair floating point load / store ops to integer
9166  // load / store ops.
9167  SDValue NewST = TransformFPLoadStorePair(N);
9168  if (NewST.getNode())
9169  return NewST;
9170 
9171  bool UseAA = CombinerAA.getNumOccurrences() > 0 ? CombinerAA :
9173  if (UseAA) {
9174  // Walk up chain skipping non-aliasing memory nodes.
9175  SDValue BetterChain = FindBetterChain(N, Chain);
9176 
9177  // If there is a better chain.
9178  if (Chain != BetterChain) {
9179  SDValue ReplStore;
9180 
9181  // Replace the chain to avoid dependency.
9182  if (ST->isTruncatingStore()) {
9183  ReplStore = DAG.getTruncStore(BetterChain, SDLoc(N), Value, Ptr,
9184  ST->getMemoryVT(), ST->getMemOperand());
9185  } else {
9186  ReplStore = DAG.getStore(BetterChain, SDLoc(N), Value, Ptr,
9187  ST->getMemOperand());
9188  }
9189 
9190  // Create token to keep both nodes around.
9191  SDValue Token = DAG.getNode(ISD::TokenFactor, SDLoc(N),
9192  MVT::Other, Chain, ReplStore);
9193 
9194  // Make sure the new and old chains are cleaned up.
9195  AddToWorkList(Token.getNode());
9196 
9197  // Don't add users to work list.
9198  return CombineTo(N, Token, false);
9199  }
9200  }
9201 
9202  // Try transforming N to an indexed store.
9203  if (CombineToPreIndexedLoadStore(N) || CombineToPostIndexedLoadStore(N))
9204  return SDValue(N, 0);
9205 
9206  // FIXME: is there such a thing as a truncating indexed store?
9207  if (ST->isTruncatingStore() && ST->isUnindexed() &&
9208  Value.getValueType().isInteger()) {
9209  // See if we can simplify the input to this truncstore with knowledge that
9210  // only the low bits are being used. For example:
9211  // "truncstore (or (shl x, 8), y), i8" -> "truncstore y, i8"
9212  SDValue Shorter =
9213  GetDemandedBits(Value,
9217  AddToWorkList(Value.getNode());
9218  if (Shorter.getNode())
9219  return DAG.getTruncStore(Chain, SDLoc(N), Shorter,
9220  Ptr, ST->getMemoryVT(), ST->getMemOperand());
9221 
9222  // Otherwise, see if we can simplify the operation with
9223  // SimplifyDemandedBits, which only works if the value has a single use.
9224  if (SimplifyDemandedBits(Value,
9228  return SDValue(N, 0);
9229  }
9230 
9231  // If this is a load followed by a store to the same location, then the store
9232  // is dead/noop.
9233  if (LoadSDNode *Ld = dyn_cast<LoadSDNode>(Value)) {
9234  if (Ld->getBasePtr() == Ptr && ST->getMemoryVT() == Ld->getMemoryVT() &&
9235  ST->isUnindexed() && !ST->isVolatile() &&
9236  // There can't be any side effects between the load and store, such as
9237  // a call or store.
9238  Chain.reachesChainWithoutSideEffects(SDValue(Ld, 1))) {
9239  // The store is dead, remove it.
9240  return Chain;
9241  }
9242  }
9243 
9244  // If this is an FP_ROUND or TRUNC followed by a store, fold this into a
9245  // truncating store. We can do this even if this is already a truncstore.
9246  if ((Value.getOpcode() == ISD::FP_ROUND || Value.getOpcode() == ISD::TRUNCATE)
9247  && Value.getNode()->hasOneUse() && ST->isUnindexed() &&
9248  TLI.isTruncStoreLegal(Value.getOperand(0).getValueType(),
9249  ST->getMemoryVT())) {
9250  return DAG.getTruncStore(Chain, SDLoc(N), Value.getOperand(0),
9251  Ptr, ST->getMemoryVT(), ST->getMemOperand());
9252  }
9253 
9254  // Only perform this optimization before the types are legal, because we
9255  // don't want to perform this optimization on every DAGCombine invocation.
9256  if (!LegalTypes) {
9257  bool EverChanged = false;
9258 
9259  do {
9260  // There can be multiple store sequences on the same chain.
9261  // Keep trying to merge store sequences until we are unable to do so
9262  // or until we merge the last store on the chain.
9263  bool Changed = MergeConsecutiveStores(ST);
9264  EverChanged |= Changed;
9265  if (!Changed) break;
9266  } while (ST->getOpcode() != ISD::DELETED_NODE);
9267 
9268  if (EverChanged)
9269  return SDValue(N, 0);
9270  }
9271 
9272  return ReduceLoadOpStoreWidth(N);
9273 }
9274 
9275 SDValue DAGCombiner::visitINSERT_VECTOR_ELT(SDNode *N) {
9276  SDValue InVec = N->getOperand(0);
9277  SDValue InVal = N->getOperand(1);
9278  SDValue EltNo = N->getOperand(2);
9279  SDLoc dl(N);
9280 
9281  // If the inserted element is an UNDEF, just use the input vector.
9282  if (InVal.getOpcode() == ISD::UNDEF)
9283  return InVec;
9284 
9285  EVT VT = InVec.getValueType();
9286 
9287  // If we can't generate a legal BUILD_VECTOR, exit
9288  if (LegalOperations && !TLI.isOperationLegal(ISD::BUILD_VECTOR, VT))
9289  return SDValue();
9290 
9291  // Check that we know which element is being inserted
9292  if (!isa<ConstantSDNode>(EltNo))
9293  return SDValue();
9294  unsigned Elt = cast<ConstantSDNode>(EltNo)->getZExtValue();
9295 
9296  // Check that the operand is a BUILD_VECTOR (or UNDEF, which can essentially
9297  // be converted to a BUILD_VECTOR). Fill in the Ops vector with the
9298  // vector elements.
9300  // Do not combine these two vectors if the output vector will not replace
9301  // the input vector.
9302  if (InVec.getOpcode() == ISD::BUILD_VECTOR && InVec.hasOneUse()) {
9303  Ops.append(InVec.getNode()->op_begin(),
9304  InVec.getNode()->op_end());
9305  } else if (InVec.getOpcode() == ISD::UNDEF) {
9306  unsigned NElts = VT.getVectorNumElements();
9307  Ops.append(NElts, DAG.getUNDEF(InVal.getValueType()));
9308  } else {
9309  return SDValue();
9310  }
9311 
9312  // Insert the element
9313  if (Elt < Ops.size()) {
9314  // All the operands of BUILD_VECTOR must have the same type;
9315  // we enforce that here.
9316  EVT OpVT = Ops[0].getValueType();
9317  if (InVal.getValueType() != OpVT)
9318  InVal = OpVT.bitsGT(InVal.getValueType()) ?
9319  DAG.getNode(ISD::ANY_EXTEND, dl, OpVT, InVal) :
9320  DAG.getNode(ISD::TRUNCATE, dl, OpVT, InVal);
9321  Ops[Elt] = InVal;
9322  }
9323 
9324  // Return the new vector
9325  return DAG.getNode(ISD::BUILD_VECTOR, dl,
9326  VT, &Ops[0], Ops.size());
9327 }
9328 
9329 SDValue DAGCombiner::visitEXTRACT_VECTOR_ELT(SDNode *N) {
9330  // (vextract (scalar_to_vector val, 0) -> val
9331  SDValue InVec = N->getOperand(0);
9332  EVT VT = InVec.getValueType();
9333  EVT NVT = N->getValueType(0);
9334 
9335  if (InVec.getOpcode() == ISD::SCALAR_TO_VECTOR) {
9336  // Check if the result type doesn't match the inserted element type. A
9337  // SCALAR_TO_VECTOR may truncate the inserted element and the
9338  // EXTRACT_VECTOR_ELT may widen the extracted vector.
9339  SDValue InOp = InVec.getOperand(0);
9340  if (InOp.getValueType() != NVT) {
9341  assert(InOp.getValueType().isInteger() && NVT.isInteger());
9342  return DAG.getSExtOrTrunc(InOp, SDLoc(InVec), NVT);
9343  }
9344  return InOp;
9345  }
9346 
9347  SDValue EltNo = N->getOperand(1);
9348  bool ConstEltNo = isa<ConstantSDNode>(EltNo);
9349 
9350  // Transform: (EXTRACT_VECTOR_ELT( VECTOR_SHUFFLE )) -> EXTRACT_VECTOR_ELT.
9351  // We only perform this optimization before the op legalization phase because
9352  // we may introduce new vector instructions which are not backed by TD
9353  // patterns. For example on AVX, extracting elements from a wide vector
9354  // without using extract_subvector.
9355  if (InVec.getOpcode() == ISD::VECTOR_SHUFFLE
9356  && ConstEltNo && !LegalOperations) {
9357  int Elt = cast<ConstantSDNode>(EltNo)->getZExtValue();
9358  int NumElem = VT.getVectorNumElements();
9359  ShuffleVectorSDNode *SVOp = cast<ShuffleVectorSDNode>(InVec);
9360  // Find the new index to extract from.
9361  int OrigElt = SVOp->getMaskElt(Elt);
9362 
9363  // Extracting an undef index is undef.
9364  if (OrigElt == -1)
9365  return DAG.getUNDEF(NVT);
9366 
9367  // Select the right vector half to extract from.
9368  if (OrigElt < NumElem) {
9369  InVec = InVec->getOperand(0);
9370  } else {
9371  InVec = InVec->getOperand(1);
9372  OrigElt -= NumElem;
9373  }
9374 
9375  EVT IndexTy = TLI.getVectorIdxTy();
9376  return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, SDLoc(N), NVT,
9377  InVec, DAG.getConstant(OrigElt, IndexTy));
9378  }
9379 
9380  // Perform only after legalization to ensure build_vector / vector_shuffle
9381  // optimizations have already been done.
9382  if (!LegalOperations) return SDValue();
9383 
9384  // (vextract (v4f32 load $addr), c) -> (f32 load $addr+c*size)
9385  // (vextract (v4f32 s2v (f32 load $addr)), c) -> (f32 load $addr+c*size)
9386  // (vextract (v4f32 shuffle (load $addr), <1,u,u,u>), 0) -> (f32 load $addr)
9387 
9388  if (ConstEltNo) {
9389  int Elt = cast<ConstantSDNode>(EltNo)->getZExtValue();
9390  bool NewLoad = false;
9391  bool BCNumEltsChanged = false;
9392  EVT ExtVT = VT.getVectorElementType();
9393  EVT LVT = ExtVT;
9394 
9395  // If the result of load has to be truncated, then it's not necessarily
9396  // profitable.
9397  if (NVT.bitsLT(LVT) && !TLI.isTruncateFree(LVT, NVT))
9398  return SDValue();
9399 
9400  if (InVec.getOpcode() == ISD::BITCAST) {
9401  // Don't duplicate a load with other uses.
9402  if (!InVec.hasOneUse())
9403  return SDValue();
9404 
9405  EVT BCVT = InVec.getOperand(0).getValueType();
9406  if (!BCVT.isVector() || ExtVT.bitsGT(BCVT.getVectorElementType()))
9407  return SDValue();
9408  if (VT.getVectorNumElements() != BCVT.getVectorNumElements())
9409  BCNumEltsChanged = true;
9410  InVec = InVec.getOperand(0);
9411  ExtVT = BCVT.getVectorElementType();
9412  NewLoad = true;
9413  }
9414 
9415  LoadSDNode *LN0 = NULL;
9416  const ShuffleVectorSDNode *SVN = NULL;
9417  if (ISD::isNormalLoad(InVec.getNode())) {
9418  LN0 = cast<LoadSDNode>(InVec);
9419  } else if (InVec.getOpcode() == ISD::SCALAR_TO_VECTOR &&
9420  InVec.getOperand(0).getValueType() == ExtVT &&
9421  ISD::isNormalLoad(InVec.getOperand(0).getNode())) {
9422  // Don't duplicate a load with other uses.
9423  if (!InVec.hasOneUse())
9424  return SDValue();
9425 
9426  LN0 = cast<LoadSDNode>(InVec.getOperand(0));
9427  } else if ((SVN = dyn_cast<ShuffleVectorSDNode>(InVec))) {
9428  // (vextract (vector_shuffle (load $addr), v2, <1, u, u, u>), 1)
9429  // =>
9430  // (load $addr+1*size)
9431 
9432  // Don't duplicate a load with other uses.
9433  if (!InVec.hasOneUse())
9434  return SDValue();
9435 
9436  // If the bit convert changed the number of elements, it is unsafe
9437  // to examine the mask.
9438  if (BCNumEltsChanged)
9439  return SDValue();
9440 
9441  // Select the input vector, guarding against out of range extract vector.
9442  unsigned NumElems = VT.getVectorNumElements();
9443  int Idx = (Elt > (int)NumElems) ? -1 : SVN->getMaskElt(Elt);
9444  InVec = (Idx < (int)NumElems) ? InVec.getOperand(0) : InVec.getOperand(1);
9445 
9446  if (InVec.getOpcode() == ISD::BITCAST) {
9447  // Don't duplicate a load with other uses.
9448  if (!InVec.hasOneUse())
9449  return SDValue();
9450 
9451  InVec = InVec.getOperand(0);
9452  }
9453  if (ISD::isNormalLoad(InVec.getNode())) {
9454  LN0 = cast<LoadSDNode>(InVec);
9455  Elt = (Idx < (int)NumElems) ? Idx : Idx - (int)NumElems;
9456  }
9457  }
9458 
9459  // Make sure we found a non-volatile load and the extractelement is
9460  // the only use.
9461  if (!LN0 || !LN0->hasNUsesOfValue(1,0) || LN0->isVolatile())
9462  return SDValue();
9463 
9464  // If Idx was -1 above, Elt is going to be -1, so just return undef.
9465  if (Elt == -1)
9466  return DAG.getUNDEF(LVT);
9467 
9468  unsigned Align = LN0->getAlignment();
9469  if (NewLoad) {
9470  // Check the resultant load doesn't need a higher alignment than the
9471  // original load.
9472  unsigned NewAlign =
9473  TLI.getDataLayout()
9474  ->getABITypeAlignment(LVT.getTypeForEVT(*DAG.getContext()));
9475 
9476  if (NewAlign > Align || !TLI.isOperationLegalOrCustom(ISD::LOAD, LVT))
9477  return SDValue();
9478 
9479  Align = NewAlign;
9480  }
9481 
9482  SDValue NewPtr = LN0->getBasePtr();
9483  unsigned PtrOff = 0;
9484 
9485  if (Elt) {
9486  PtrOff = LVT.getSizeInBits() * Elt / 8;
9487  EVT PtrType = NewPtr.getValueType();
9488  if (TLI.isBigEndian())
9489  PtrOff = VT.getSizeInBits() / 8 - PtrOff;
9490  NewPtr = DAG.getNode(ISD::ADD, SDLoc(N), PtrType, NewPtr,
9491  DAG.getConstant(PtrOff, PtrType));
9492  }
9493 
9494  // The replacement we need to do here is a little tricky: we need to
9495  // replace an extractelement of a load with a load.
9496  // Use ReplaceAllUsesOfValuesWith to do the replacement.
9497  // Note that this replacement assumes that the extractvalue is the only
9498  // use of the load; that's okay because we don't want to perform this
9499  // transformation in other cases anyway.
9500  SDValue Load;
9501  SDValue Chain;
9502  if (NVT.bitsGT(LVT)) {
9503  // If the result type of vextract is wider than the load, then issue an
9504  // extending load instead.
9505  ISD::LoadExtType ExtType = TLI.isLoadExtLegal(ISD::ZEXTLOAD, LVT)
9507  Load = DAG.getExtLoad(ExtType, SDLoc(N), NVT, LN0->getChain(),
9508  NewPtr, LN0->getPointerInfo().getWithOffset(PtrOff),
9509  LVT, LN0->isVolatile(), LN0->isNonTemporal(),
9510  Align, LN0->getTBAAInfo());
9511  Chain = Load.getValue(1);
9512  } else {
9513  Load = DAG.getLoad(LVT, SDLoc(N), LN0->getChain(), NewPtr,
9514  LN0->getPointerInfo().getWithOffset(PtrOff),
9515  LN0->isVolatile(), LN0->isNonTemporal(),
9516  LN0->isInvariant(), Align, LN0->getTBAAInfo());
9517  Chain = Load.getValue(1);
9518  if (NVT.bitsLT(LVT))
9519  Load = DAG.getNode(ISD::TRUNCATE, SDLoc(N), NVT, Load);
9520  else
9521  Load = DAG.getNode(ISD::BITCAST, SDLoc(N), NVT, Load);
9522  }
9523  WorkListRemover DeadNodes(*this);
9524  SDValue From[] = { SDValue(N, 0), SDValue(LN0,1) };
9525  SDValue To[] = { Load, Chain };
9526  DAG.ReplaceAllUsesOfValuesWith(From, To, 2);
9527  // Since we're explcitly calling ReplaceAllUses, add the new node to the
9528  // worklist explicitly as well.
9529  AddToWorkList(Load.getNode());
9530  AddUsersToWorkList(Load.getNode()); // Add users too
9531  // Make sure to revisit this node to clean it up; it will usually be dead.
9532  AddToWorkList(N);
9533  return SDValue(N, 0);
9534  }
9535 
9536  return SDValue();
9537 }
9538 
9539 // Simplify (build_vec (ext )) to (bitcast (build_vec ))
9540 SDValue DAGCombiner::reduceBuildVecExtToExtBuildVec(SDNode *N) {
9541  // We perform this optimization post type-legalization because
9542  // the type-legalizer often scalarizes integer-promoted vectors.
9543  // Performing this optimization before may create bit-casts which
9544  // will be type-legalized to complex code sequences.
9545  // We perform this optimization only before the operation legalizer because we
9546  // may introduce illegal operations.
9548  return SDValue();
9549 
9550  unsigned NumInScalars = N->getNumOperands();
9551  SDLoc dl(N);
9552  EVT VT = N->getValueType(0);
9553 
9554  // Check to see if this is a BUILD_VECTOR of a bunch of values
9555  // which come from any_extend or zero_extend nodes. If so, we can create
9556  // a new BUILD_VECTOR using bit-casts which may enable other BUILD_VECTOR
9557  // optimizations. We do not handle sign-extend because we can't fill the sign
9558  // using shuffles.
9559  EVT SourceType = MVT::Other;
9560  bool AllAnyExt = true;
9561 
9562  for (unsigned i = 0; i != NumInScalars; ++i) {
9563  SDValue In = N->getOperand(i);
9564  // Ignore undef inputs.
9565  if (In.getOpcode() == ISD::UNDEF) continue;
9566 
9567  bool AnyExt = In.getOpcode() == ISD::ANY_EXTEND;
9568  bool ZeroExt = In.getOpcode() == ISD::ZERO_EXTEND;
9569 
9570  // Abort if the element is not an extension.
9571  if (!ZeroExt && !AnyExt) {
9572  SourceType = MVT::Other;
9573  break;
9574  }
9575 
9576  // The input is a ZeroExt or AnyExt. Check the original type.
9577  EVT InTy = In.getOperand(0).getValueType();
9578 
9579  // Check that all of the widened source types are the same.
9580  if (SourceType == MVT::Other)
9581  // First time.
9582  SourceType = InTy;
9583  else if (InTy != SourceType) {
9584  // Multiple income types. Abort.
9585  SourceType = MVT::Other;
9586  break;
9587  }
9588 
9589  // Check if all of the extends are ANY_EXTENDs.
9590  AllAnyExt &= AnyExt;
9591  }
9592 
9593  // In order to have valid types, all of the inputs must be extended from the
9594  // same source type and all of the inputs must be any or zero extend.
9595  // Scalar sizes must be a power of two.
9596  EVT OutScalarTy = VT.getScalarType();
9597  bool ValidTypes = SourceType != MVT::Other &&
9598  isPowerOf2_32(OutScalarTy.getSizeInBits()) &&
9599  isPowerOf2_32(SourceType.getSizeInBits());
9600 
9601  // Create a new simpler BUILD_VECTOR sequence which other optimizations can
9602  // turn into a single shuffle instruction.
9603  if (!ValidTypes)
9604  return SDValue();
9605 
9606  bool isLE = TLI.isLittleEndian();
9607  unsigned ElemRatio = OutScalarTy.getSizeInBits()/SourceType.getSizeInBits();
9608  assert(ElemRatio > 1 && "Invalid element size ratio");
9609  SDValue Filler = AllAnyExt ? DAG.getUNDEF(SourceType):
9610  DAG.getConstant(0, SourceType);
9611 
9612  unsigned NewBVElems = ElemRatio * VT.getVectorNumElements();
9613  SmallVector<SDValue, 8> Ops(NewBVElems, Filler);
9614 
9615  // Populate the new build_vector
9616  for (unsigned i = 0, e = N->getNumOperands(); i != e; ++i) {
9617  SDValue Cast = N->getOperand(i);
9618  assert((Cast.getOpcode() == ISD::ANY_EXTEND ||
9619  Cast.getOpcode() == ISD::ZERO_EXTEND ||
9620  Cast.getOpcode() == ISD::UNDEF) && "Invalid cast opcode");
9621  SDValue In;
9622  if (Cast.getOpcode() == ISD::UNDEF)
9623  In = DAG.getUNDEF(SourceType);
9624  else
9625  In = Cast->getOperand(0);
9626  unsigned Index = isLE ? (i * ElemRatio) :
9627  (i * ElemRatio + (ElemRatio - 1));
9628 
9629  assert(Index < Ops.size() && "Invalid index");
9630  Ops[Index] = In;
9631  }
9632 
9633  // The type of the new BUILD_VECTOR node.
9634  EVT VecVT = EVT::getVectorVT(*DAG.getContext(), SourceType, NewBVElems);
9635  assert(VecVT.getSizeInBits() == VT.getSizeInBits() &&
9636  "Invalid vector size");
9637  // Check if the new vector type is legal.
9638  if (!isTypeLegal(VecVT)) return SDValue();
9639 
9640  // Make the new BUILD_VECTOR.
9641  SDValue BV = DAG.getNode(ISD::BUILD_VECTOR, dl, VecVT, &Ops[0], Ops.size());
9642 
9643  // The new BUILD_VECTOR node has the potential to be further optimized.
9644  AddToWorkList(BV.getNode());
9645  // Bitcast to the desired type.
9646  return DAG.getNode(ISD::BITCAST, dl, VT, BV);
9647 }
9648 
9649 SDValue DAGCombiner::reduceBuildVecConvertToConvertBuildVec(SDNode *N) {
9650  EVT VT = N->getValueType(0);
9651 
9652  unsigned NumInScalars = N->getNumOperands();
9653  SDLoc dl(N);
9654 
9655  EVT SrcVT = MVT::Other;
9656  unsigned Opcode = ISD::DELETED_NODE;
9657  unsigned NumDefs = 0;
9658 
9659  for (unsigned i = 0; i != NumInScalars; ++i) {
9660  SDValue In = N->getOperand(i);
9661  unsigned Opc = In.getOpcode();
9662 
9663  if (Opc == ISD::UNDEF)
9664  continue;
9665 
9666  // If all scalar values are floats and converted from integers.
9667  if (Opcode == ISD::DELETED_NODE &&
9668  (Opc == ISD::UINT_TO_FP || Opc == ISD::SINT_TO_FP)) {
9669  Opcode = Opc;
9670  }
9671 
9672  if (Opc != Opcode)
9673  return SDValue();
9674 
9675  EVT InVT = In.getOperand(0).getValueType();
9676 
9677  // If all scalar values are typed differently, bail out. It's chosen to
9678  // simplify BUILD_VECTOR of integer types.
9679  if (SrcVT == MVT::Other)
9680  SrcVT = InVT;
9681  if (SrcVT != InVT)
9682  return SDValue();
9683  NumDefs++;
9684  }
9685 
9686  // If the vector has just one element defined, it's not worth to fold it into
9687  // a vectorized one.
9688  if (NumDefs < 2)
9689  return SDValue();
9690 
9691  assert((Opcode == ISD::UINT_TO_FP || Opcode == ISD::SINT_TO_FP)
9692  && "Should only handle conversion from integer to float.");
9693  assert(SrcVT != MVT::Other && "Cannot determine source type!");
9694 
9695  EVT NVT = EVT::getVectorVT(*DAG.getContext(), SrcVT, NumInScalars);
9696 
9697  if (!TLI.isOperationLegalOrCustom(Opcode, NVT))
9698  return SDValue();
9699 
9701  for (unsigned i = 0; i != NumInScalars; ++i) {
9702  SDValue In = N->getOperand(i);
9703 
9704  if (In.getOpcode() == ISD::UNDEF)
9705  Opnds.push_back(DAG.getUNDEF(SrcVT));
9706  else
9707  Opnds.push_back(In.getOperand(0));
9708  }
9709  SDValue BV = DAG.getNode(ISD::BUILD_VECTOR, dl, NVT,
9710  &Opnds[0], Opnds.size());
9711  AddToWorkList(BV.getNode());
9712 
9713  return DAG.getNode(Opcode, dl, VT, BV);
9714 }
9715 
9716 SDValue DAGCombiner::visitBUILD_VECTOR(SDNode *N) {
9717  unsigned NumInScalars = N->getNumOperands();
9718  SDLoc dl(N);
9719  EVT VT = N->getValueType(0);
9720 
9721  // A vector built entirely of undefs is undef.
9722  if (ISD::allOperandsUndef(N))
9723  return DAG.getUNDEF(VT);
9724 
9725  SDValue V = reduceBuildVecExtToExtBuildVec(N);
9726  if (V.getNode())
9727  return V;
9728 
9729  V = reduceBuildVecConvertToConvertBuildVec(N);
9730  if (V.getNode())
9731  return V;
9732 
9733  // Check to see if this is a BUILD_VECTOR of a bunch of EXTRACT_VECTOR_ELT
9734  // operations. If so, and if the EXTRACT_VECTOR_ELT vector inputs come from
9735  // at most two distinct vectors, turn this into a shuffle node.
9736 
9737  // May only combine to shuffle after legalize if shuffle is legal.
9738  if (LegalOperations &&
9740  return SDValue();
9741 
9742  SDValue VecIn1, VecIn2;
9743  for (unsigned i = 0; i != NumInScalars; ++i) {
9744  // Ignore undef inputs.
9745  if (N->getOperand(i).getOpcode() == ISD::UNDEF) continue;
9746 
9747  // If this input is something other than a EXTRACT_VECTOR_ELT with a
9748  // constant index, bail out.
9749  if (N->getOperand(i).getOpcode() != ISD::EXTRACT_VECTOR_ELT ||
9750  !isa<ConstantSDNode>(N->getOperand(i).getOperand(1))) {
9751  VecIn1 = VecIn2 = SDValue(0, 0);
9752  break;
9753  }
9754 
9755  // We allow up to two distinct input vectors.
9756  SDValue ExtractedFromVec = N->getOperand(i).getOperand(0);
9757  if (ExtractedFromVec == VecIn1 || ExtractedFromVec == VecIn2)
9758  continue;
9759 
9760  if (VecIn1.getNode() == 0) {
9761  VecIn1 = ExtractedFromVec;
9762  } else if (VecIn2.getNode() == 0) {
9763  VecIn2 = ExtractedFromVec;
9764  } else {
9765  // Too many inputs.
9766  VecIn1 = VecIn2 = SDValue(0, 0);
9767  break;
9768  }
9769  }
9770 
9771  // If everything is good, we can make a shuffle operation.
9772  if (VecIn1.getNode()) {
9773  SmallVector<int, 8> Mask;
9774  for (unsigned i = 0; i != NumInScalars; ++i) {
9775  if (N->getOperand(i).getOpcode() == ISD::UNDEF) {
9776  Mask.push_back(-1);
9777  continue;
9778  }
9779 
9780  // If extracting from the first vector, just use the index directly.
9781  SDValue Extract = N->getOperand(i);
9782  SDValue ExtVal = Extract.getOperand(1);
9783  if (Extract.getOperand(0) == VecIn1) {
9784  unsigned ExtIndex = cast<ConstantSDNode>(ExtVal)->getZExtValue();
9785  if (ExtIndex > VT.getVectorNumElements())
9786  return SDValue();
9787 
9788  Mask.push_back(ExtIndex);
9789  continue;
9790  }
9791 
9792  // Otherwise, use InIdx + VecSize
9793  unsigned Idx = cast<ConstantSDNode>(ExtVal)->getZExtValue();
9794  Mask.push_back(Idx+NumInScalars);
9795  }
9796 
9797  // We can't generate a shuffle node with mismatched input and output types.
9798  // Attempt to transform a single input vector to the correct type.
9799  if ((VT != VecIn1.getValueType())) {
9800  // We don't support shuffeling between TWO values of different types.
9801  if (VecIn2.getNode() != 0)
9802  return SDValue();
9803 
9804  // We only support widening of vectors which are half the size of the
9805  // output registers. For example XMM->YMM widening on X86 with AVX.
9806  if (VecIn1.getValueType().getSizeInBits()*2 != VT.getSizeInBits())
9807  return SDValue();
9808 
9809  // If the input vector type has a different base type to the output
9810  // vector type, bail out.
9811  if (VecIn1.getValueType().getVectorElementType() !=
9812  VT.getVectorElementType())
9813  return SDValue();
9814 
9815  // Widen the input vector by adding undef values.
9816  VecIn1 = DAG.getNode(ISD::CONCAT_VECTORS, dl, VT,
9817  VecIn1, DAG.getUNDEF(VecIn1.getValueType()));
9818  }
9819 
9820  // If VecIn2 is unused then change it to undef.
9821  VecIn2 = VecIn2.getNode() ? VecIn2 : DAG.getUNDEF(VT);
9822 
9823  // Check that we were able to transform all incoming values to the same
9824  // type.
9825  if (VecIn2.getValueType() != VecIn1.getValueType() ||
9826  VecIn1.getValueType() != VT)
9827  return SDValue();
9828 
9829  // Only type-legal BUILD_VECTOR nodes are converted to shuffle nodes.
9830  if (!isTypeLegal(VT))
9831  return SDValue();
9832 
9833  // Return the new VECTOR_SHUFFLE node.
9834  SDValue Ops[2];
9835  Ops[0] = VecIn1;
9836  Ops[1] = VecIn2;
9837  return DAG.getVectorShuffle(VT, dl, Ops[0], Ops[1], &Mask[0]);
9838  }
9839 
9840  return SDValue();
9841 }
9842 
9843 SDValue DAGCombiner::visitCONCAT_VECTORS(SDNode *N) {
9844  // TODO: Check to see if this is a CONCAT_VECTORS of a bunch of
9845  // EXTRACT_SUBVECTOR operations. If so, and if the EXTRACT_SUBVECTOR vector
9846  // inputs come from at most two distinct vectors, turn this into a shuffle
9847  // node.
9848 
9849  // If we only have one input vector, we don't need to do any concatenation.
9850  if (N->getNumOperands() == 1)
9851  return N->getOperand(0);
9852 
9853  // Check if all of the operands are undefs.
9854  EVT VT = N->getValueType(0);
9855  if (ISD::allOperandsUndef(N))
9856  return DAG.getUNDEF(VT);
9857 
9858  // Optimize concat_vectors where one of the vectors is undef.
9859  if (N->getNumOperands() == 2 &&
9860  N->getOperand(1)->getOpcode() == ISD::UNDEF) {
9861  SDValue In = N->getOperand(0);
9862  assert(In->getValueType(0).isVector() && "Must concat vectors");
9863 
9864  // Transform: concat_vectors(scalar, undef) -> scalar_to_vector(sclr).
9865  if (In->getOpcode() == ISD::BITCAST &&
9866  !In->getOperand(0)->getValueType(0).isVector()) {
9867  SDValue Scalar = In->getOperand(0);
9868  EVT SclTy = Scalar->getValueType(0);
9869 
9870  if (!SclTy.isFloatingPoint() && !SclTy.isInteger())
9871  return SDValue();
9872 
9873  EVT NVT = EVT::getVectorVT(*DAG.getContext(), SclTy,
9874  VT.getSizeInBits() / SclTy.getSizeInBits());
9875  if (!TLI.isTypeLegal(NVT) || !TLI.isTypeLegal(Scalar.getValueType()))
9876  return SDValue();
9877 
9878  SDLoc dl = SDLoc(N);
9879  SDValue Res = DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, NVT, Scalar);
9880  return DAG.getNode(ISD::BITCAST, dl, VT, Res);
9881  }
9882  }
9883 
9884  // Type legalization of vectors and DAG canonicalization of SHUFFLE_VECTOR
9885  // nodes often generate nop CONCAT_VECTOR nodes.
9886  // Scan the CONCAT_VECTOR operands and look for a CONCAT operations that
9887  // place the incoming vectors at the exact same location.
9888  SDValue SingleSource = SDValue();
9889  unsigned PartNumElem = N->getOperand(0).getValueType().getVectorNumElements();
9890 
9891  for (unsigned i = 0, e = N->getNumOperands(); i != e; ++i) {
9892  SDValue Op = N->getOperand(i);
9893 
9894  if (Op.getOpcode() == ISD::UNDEF)
9895  continue;
9896 
9897  // Check if this is the identity extract:
9898  if (Op.getOpcode() != ISD::EXTRACT_SUBVECTOR)
9899  return SDValue();
9900 
9901  // Find the single incoming vector for the extract_subvector.
9902  if (SingleSource.getNode()) {
9903  if (Op.getOperand(0) != SingleSource)
9904  return SDValue();
9905  } else {
9906  SingleSource = Op.getOperand(0);
9907 
9908  // Check the source type is the same as the type of the result.
9909  // If not, this concat may extend the vector, so we can not
9910  // optimize it away.
9911  if (SingleSource.getValueType() != N->getValueType(0))
9912  return SDValue();
9913  }
9914 
9915  unsigned IdentityIndex = i * PartNumElem;
9917  // The extract index must be constant.
9918  if (!CS)
9919  return SDValue();
9920 
9921  // Check that we are reading from the identity index.
9922  if (CS->getZExtValue() != IdentityIndex)
9923  return SDValue();
9924  }
9925 
9926  if (SingleSource.getNode())
9927  return SingleSource;
9928 
9929  return SDValue();
9930 }
9931 
9932 SDValue DAGCombiner::visitEXTRACT_SUBVECTOR(SDNode* N) {
9933  EVT NVT = N->getValueType(0);
9934  SDValue V = N->getOperand(0);
9935 
9936  if (V->getOpcode() == ISD::CONCAT_VECTORS) {
9937  // Combine:
9938  // (extract_subvec (concat V1, V2, ...), i)
9939  // Into:
9940  // Vi if possible
9941  // Only operand 0 is checked as 'concat' assumes all inputs of the same
9942  // type.
9943  if (V->getOperand(0).getValueType() != NVT)
9944  return SDValue();
9945  unsigned Idx = dyn_cast<ConstantSDNode>(N->getOperand(1))->getZExtValue();
9946  unsigned NumElems = NVT.getVectorNumElements();
9947  assert((Idx % NumElems) == 0 &&
9948  "IDX in concat is not a multiple of the result vector length.");
9949  return V->getOperand(Idx / NumElems);
9950  }
9951 
9952  // Skip bitcasting
9953  if (V->getOpcode() == ISD::BITCAST)
9954  V = V.getOperand(0);
9955 
9956  if (V->getOpcode() == ISD::INSERT_SUBVECTOR) {
9957  SDLoc dl(N);
9958  // Handle only simple case where vector being inserted and vector
9959  // being extracted are of same type, and are half size of larger vectors.
9960  EVT BigVT = V->getOperand(0).getValueType();
9961  EVT SmallVT = V->getOperand(1).getValueType();
9962  if (!NVT.bitsEq(SmallVT) || NVT.getSizeInBits()*2 != BigVT.getSizeInBits())
9963  return SDValue();
9964 
9965  // Only handle cases where both indexes are constants with the same type.
9968 
9969  if (InsIdx && ExtIdx &&
9970  InsIdx->getValueType(0).getSizeInBits() <= 64 &&
9971  ExtIdx->getValueType(0).getSizeInBits() <= 64) {
9972  // Combine:
9973  // (extract_subvec (insert_subvec V1, V2, InsIdx), ExtIdx)
9974  // Into:
9975  // indices are equal or bit offsets are equal => V1
9976  // otherwise => (extract_subvec V1, ExtIdx)
9977  if (InsIdx->getZExtValue() * SmallVT.getScalarType().getSizeInBits() ==
9978  ExtIdx->getZExtValue() * NVT.getScalarType().getSizeInBits())
9979  return DAG.getNode(ISD::BITCAST, dl, NVT, V->getOperand(1));
9980  return DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, NVT,
9981  DAG.getNode(ISD::BITCAST, dl,
9982  N->getOperand(0).getValueType(),
9983  V->getOperand(0)), N->getOperand(1));
9984  }
9985  }
9986 
9987  return SDValue();
9988 }
9989 
9990 // Tries to turn a shuffle of two CONCAT_VECTORS into a single concat.
9992  EVT VT = N->getValueType(0);
9993  unsigned NumElts = VT.getVectorNumElements();
9994 
9995  SDValue N0 = N->getOperand(0);
9996  SDValue N1 = N->getOperand(1);
9997  ShuffleVectorSDNode *SVN = cast<ShuffleVectorSDNode>(N);
9998 
10000  EVT ConcatVT = N0.getOperand(0).getValueType();
10001  unsigned NumElemsPerConcat = ConcatVT.getVectorNumElements();
10002  unsigned NumConcats = NumElts / NumElemsPerConcat;
10003 
10004  // Look at every vector that's inserted. We're looking for exact
10005  // subvector-sized copies from a concatenated vector
10006  for (unsigned I = 0; I != NumConcats; ++I) {
10007  // Make sure we're dealing with a copy.
10008  unsigned Begin = I * NumElemsPerConcat;
10009  bool AllUndef = true, NoUndef = true;
10010  for (unsigned J = Begin; J != Begin + NumElemsPerConcat; ++J) {
10011  if (SVN->getMaskElt(J) >= 0)
10012  AllUndef = false;
10013  else
10014  NoUndef = false;
10015  }
10016 
10017  if (NoUndef) {
10018  if (SVN->getMaskElt(Begin) % NumElemsPerConcat != 0)
10019  return SDValue();
10020 
10021  for (unsigned J = 1; J != NumElemsPerConcat; ++J)
10022  if (SVN->getMaskElt(Begin + J - 1) + 1 != SVN->getMaskElt(Begin + J))
10023  return SDValue();
10024 
10025  unsigned FirstElt = SVN->getMaskElt(Begin) / NumElemsPerConcat;
10026  if (FirstElt < N0.getNumOperands())
10027  Ops.push_back(N0.getOperand(FirstElt));
10028  else
10029  Ops.push_back(N1.getOperand(FirstElt - N0.getNumOperands()));
10030 
10031  } else if (AllUndef) {
10032  Ops.push_back(DAG.getUNDEF(N0.getOperand(0).getValueType()));
10033  } else { // Mixed with general masks and undefs, can't do optimization.
10034  return SDValue();
10035  }
10036  }
10037 
10038  return DAG.getNode(ISD::CONCAT_VECTORS, SDLoc(N), VT, Ops.data(),
10039  Ops.size());
10040 }
10041 
10042 SDValue DAGCombiner::visitVECTOR_SHUFFLE(SDNode *N) {
10043  EVT VT = N->getValueType(0);
10044  unsigned NumElts = VT.getVectorNumElements();
10045 
10046  SDValue N0 = N->getOperand(0);
10047  SDValue N1 = N->getOperand(1);
10048 
10049  assert(N0.getValueType() == VT && "Vector shuffle must be normalized in DAG");
10050 
10051  // Canonicalize shuffle undef, undef -> undef
10052  if (N0.getOpcode() == ISD::UNDEF && N1.getOpcode() == ISD::UNDEF)
10053  return DAG.getUNDEF(VT);
10054 
10055  ShuffleVectorSDNode *SVN = cast<ShuffleVectorSDNode>(N);
10056 
10057  // Canonicalize shuffle v, v -> v, undef
10058  if (N0 == N1) {
10059  SmallVector<int, 8> NewMask;
10060  for (unsigned i = 0; i != NumElts; ++i) {
10061  int Idx = SVN->getMaskElt(i);
10062  if (Idx >= (int)NumElts) Idx -= NumElts;
10063  NewMask.push_back(Idx);
10064  }
10065  return DAG.getVectorShuffle(VT, SDLoc(N), N0, DAG.getUNDEF(VT),
10066  &NewMask[0]);
10067  }
10068 
10069  // Canonicalize shuffle undef, v -> v, undef. Commute the shuffle mask.
10070  if (N0.getOpcode() == ISD::UNDEF) {
10071  SmallVector<int, 8> NewMask;
10072  for (unsigned i = 0; i != NumElts; ++i) {
10073  int Idx = SVN->getMaskElt(i);
10074  if (Idx >= 0) {
10075  if (Idx >= (int)NumElts)
10076  Idx -= NumElts;
10077  else
10078  Idx = -1; // remove reference to lhs
10079  }
10080  NewMask.push_back(Idx);
10081  }
10082  return DAG.getVectorShuffle(VT, SDLoc(N), N1, DAG.getUNDEF(VT),
10083  &NewMask[0]);
10084  }
10085 
10086  // Remove references to rhs if it is undef
10087  if (N1.getOpcode() == ISD::UNDEF) {
10088  bool Changed = false;
10089  SmallVector<int, 8> NewMask;
10090  for (unsigned i = 0; i != NumElts; ++i) {
10091  int Idx = SVN->getMaskElt(i);
10092  if (Idx >= (int)NumElts) {
10093  Idx = -1;
10094  Changed = true;
10095  }
10096  NewMask.push_back(Idx);
10097  }
10098  if (Changed)
10099  return DAG.getVectorShuffle(VT, SDLoc(N), N0, N1, &NewMask[0]);
10100  }
10101 
10102  // If it is a splat, check if the argument vector is another splat or a
10103  // build_vector with all scalar elements the same.
10104  if (SVN->isSplat() && SVN->getSplatIndex() < (int)NumElts) {
10105  SDNode *V = N0.getNode();
10106 
10107  // If this is a bit convert that changes the element type of the vector but
10108  // not the number of vector elements, look through it. Be careful not to
10109  // look though conversions that change things like v4f32 to v2f64.
10110  if (V->getOpcode() == ISD::BITCAST) {
10111  SDValue ConvInput = V->getOperand(0);
10112  if (ConvInput.getValueType().isVector() &&
10113  ConvInput.getValueType().getVectorNumElements() == NumElts)
10114  V = ConvInput.getNode();
10115  }
10116 
10117  if (V->getOpcode() == ISD::BUILD_VECTOR) {
10118  assert(V->getNumOperands() == NumElts &&
10119  "BUILD_VECTOR has wrong number of operands");
10120  SDValue Base;
10121  bool AllSame = true;
10122  for (unsigned i = 0; i != NumElts; ++i) {
10123  if (V->getOperand(i).getOpcode() != ISD::UNDEF) {
10124  Base = V->getOperand(i);
10125  break;
10126  }
10127  }
10128  // Splat of <u, u, u, u>, return <u, u, u, u>
10129  if (!Base.getNode())
10130  return N0;
10131  for (unsigned i = 0; i != NumElts; ++i) {
10132  if (V->getOperand(i) != Base) {
10133  AllSame = false;
10134  break;
10135  }
10136  }
10137  // Splat of <x, x, x, x>, return <x, x, x, x>
10138  if (AllSame)
10139  return N0;
10140  }
10141  }
10142 
10143  if (N0.getOpcode() == ISD::CONCAT_VECTORS &&
10145  (N1.getOpcode() == ISD::UNDEF ||
10146  (N1.getOpcode() == ISD::CONCAT_VECTORS &&
10147  N0.getOperand(0).getValueType() == N1.getOperand(0).getValueType()))) {
10148  SDValue V = partitionShuffleOfConcats(N, DAG);
10149 
10150  if (V.getNode())
10151  return V;
10152  }
10153 
10154  // If this shuffle node is simply a swizzle of another shuffle node,
10155  // and it reverses the swizzle of the previous shuffle then we can
10156  // optimize shuffle(shuffle(x, undef), undef) -> x.
10158  N1.getOpcode() == ISD::UNDEF) {
10159 
10160  ShuffleVectorSDNode *OtherSV = cast<ShuffleVectorSDNode>(N0);
10161 
10162  // Shuffle nodes can only reverse shuffles with a single non-undef value.
10163  if (N0.getOperand(1).getOpcode() != ISD::UNDEF)
10164  return SDValue();
10165 
10166  // The incoming shuffle must be of the same type as the result of the
10167  // current shuffle.
10168  assert(OtherSV->getOperand(0).getValueType() == VT &&
10169  "Shuffle types don't match");
10170 
10171  for (unsigned i = 0; i != NumElts; ++i) {
10172  int Idx = SVN->getMaskElt(i);
10173  assert(Idx < (int)NumElts && "Index references undef operand");
10174  // Next, this index comes from the first value, which is the incoming
10175  // shuffle. Adopt the incoming index.
10176  if (Idx >= 0)
10177  Idx = OtherSV->getMaskElt(Idx);
10178 
10179  // The combined shuffle must map each index to itself.
10180  if (Idx >= 0 && (unsigned)Idx != i)
10181  return SDValue();
10182  }
10183 
10184  return OtherSV->getOperand(0);
10185  }
10186 
10187  return SDValue();
10188 }
10189 
10190 /// XformToShuffleWithZero - Returns a vector_shuffle if it able to transform
10191 /// an AND to a vector_shuffle with the destination vector and a zero vector.
10192 /// e.g. AND V, <0xffffffff, 0, 0xffffffff, 0>. ==>
10193 /// vector_shuffle V, Zero, <0, 4, 2, 4>
10194 SDValue DAGCombiner::XformToShuffleWithZero(SDNode *N) {
10195  EVT VT = N->getValueType(0);
10196  SDLoc dl(N);
10197  SDValue LHS = N->getOperand(0);
10198  SDValue RHS = N->getOperand(1);
10199  if (N->getOpcode() == ISD::AND) {
10200  if (RHS.getOpcode() == ISD::BITCAST)
10201  RHS = RHS.getOperand(0);
10202  if (RHS.getOpcode() == ISD::BUILD_VECTOR) {
10203  SmallVector<int, 8> Indices;
10204  unsigned NumElts = RHS.getNumOperands();
10205  for (unsigned i = 0; i != NumElts; ++i) {
10206  SDValue Elt = RHS.getOperand(i);
10207  if (!isa<ConstantSDNode>(Elt))
10208  return SDValue();
10209 
10210  if (cast<ConstantSDNode>(Elt)->isAllOnesValue())
10211  Indices.push_back(i);
10212  else if (cast<ConstantSDNode>(Elt)->isNullValue())
10213  Indices.push_back(NumElts);
10214  else
10215  return SDValue();
10216  }
10217 
10218  // Let's see if the target supports this vector_shuffle.
10219  EVT RVT = RHS.getValueType();
10220  if (!TLI.isVectorClearMaskLegal(Indices, RVT))
10221  return SDValue();
10222 
10223  // Return the new VECTOR_SHUFFLE node.
10224  EVT EltVT = RVT.getVectorElementType();
10226  DAG.getConstant(0, EltVT));
10227  SDValue Zero = DAG.getNode(ISD::BUILD_VECTOR, SDLoc(N),
10228  RVT, &ZeroOps[0], ZeroOps.size());
10229  LHS = DAG.getNode(ISD::BITCAST, dl, RVT, LHS);
10230  SDValue Shuf = DAG.getVectorShuffle(RVT, dl, LHS, Zero, &Indices[0]);
10231  return DAG.getNode(ISD::BITCAST, dl, VT, Shuf);
10232  }
10233  }
10234 
10235  return SDValue();
10236 }
10237 
10238 /// SimplifyVBinOp - Visit a binary vector operation, like ADD.
10239 SDValue DAGCombiner::SimplifyVBinOp(SDNode *N) {
10240  assert(N->getValueType(0).isVector() &&
10241  "SimplifyVBinOp only works on vectors!");
10242 
10243  SDValue LHS = N->getOperand(0);
10244  SDValue RHS = N->getOperand(1);
10245  SDValue Shuffle = XformToShuffleWithZero(N);
10246  if (Shuffle.getNode()) return Shuffle;
10247 
10248  // If the LHS and RHS are BUILD_VECTOR nodes, see if we can constant fold
10249  // this operation.
10250  if (LHS.getOpcode() == ISD::BUILD_VECTOR &&
10251  RHS.getOpcode() == ISD::BUILD_VECTOR) {
10253  for (unsigned i = 0, e = LHS.getNumOperands(); i != e; ++i) {
10254  SDValue LHSOp = LHS.getOperand(i);
10255  SDValue RHSOp = RHS.getOperand(i);
10256  // If these two elements can't be folded, bail out.
10257  if ((LHSOp.getOpcode() != ISD::UNDEF &&
10258  LHSOp.getOpcode() != ISD::Constant &&
10259  LHSOp.getOpcode() != ISD::ConstantFP) ||
10260  (RHSOp.getOpcode() != ISD::UNDEF &&
10261  RHSOp.getOpcode() != ISD::Constant &&
10262  RHSOp.getOpcode() != ISD::ConstantFP))
10263  break;
10264 
10265  // Can't fold divide by zero.
10266  if (N->getOpcode() == ISD::SDIV || N->getOpcode() == ISD::UDIV ||
10267  N->getOpcode() == ISD::FDIV) {
10268  if ((RHSOp.getOpcode() == ISD::Constant &&
10269  cast<ConstantSDNode>(RHSOp.getNode())->isNullValue()) ||
10270  (RHSOp.getOpcode() == ISD::ConstantFP &&
10271  cast<ConstantFPSDNode>(RHSOp.getNode())->getValueAPF().isZero()))
10272  break;
10273  }
10274 
10275  EVT VT = LHSOp.getValueType();
10276  EVT RVT = RHSOp.getValueType();
10277  if (RVT != VT) {
10278  // Integer BUILD_VECTOR operands may have types larger than the element
10279  // size (e.g., when the element type is not legal). Prior to type
10280  // legalization, the types may not match between the two BUILD_VECTORS.
10281  // Truncate one of the operands to make them match.
10282  if (RVT.getSizeInBits() > VT.getSizeInBits()) {
10283  RHSOp = DAG.getNode(ISD::TRUNCATE, SDLoc(N), VT, RHSOp);
10284  } else {
10285  LHSOp = DAG.getNode(ISD::TRUNCATE, SDLoc(N), RVT, LHSOp);
10286  VT = RVT;
10287  }
10288  }
10289  SDValue FoldOp = DAG.getNode(N->getOpcode(), SDLoc(LHS), VT,
10290  LHSOp, RHSOp);
10291  if (FoldOp.getOpcode() != ISD::UNDEF &&
10292  FoldOp.getOpcode() != ISD::Constant &&
10293  FoldOp.getOpcode() != ISD::ConstantFP)
10294  break;
10295  Ops.push_back(FoldOp);
10296  AddToWorkList(FoldOp.getNode());
10297  }
10298 
10299  if (Ops.size() == LHS.getNumOperands())
10300  return DAG.getNode(ISD::BUILD_VECTOR, SDLoc(N),
10301  LHS.getValueType(), &Ops[0], Ops.size());
10302  }
10303 
10304  return SDValue();
10305 }
10306 
10307 /// SimplifyVUnaryOp - Visit a binary vector operation, like FABS/FNEG.
10308 SDValue DAGCombiner::SimplifyVUnaryOp(SDNode *N) {
10309  assert(N->getValueType(0).isVector() &&
10310  "SimplifyVUnaryOp only works on vectors!");
10311 
10312  SDValue N0 = N->getOperand(0);
10313 
10314  if (N0.getOpcode() != ISD::BUILD_VECTOR)
10315  return SDValue();
10316 
10317  // Operand is a BUILD_VECTOR node, see if we can constant fold it.
10319  for (unsigned i = 0, e = N0.getNumOperands(); i != e; ++i) {
10320  SDValue Op = N0.getOperand(i);
10321  if (Op.getOpcode() != ISD::UNDEF &&
10322  Op.getOpcode() != ISD::ConstantFP)
10323  break;
10324  EVT EltVT = Op.getValueType();
10325  SDValue FoldOp = DAG.getNode(N->getOpcode(), SDLoc(N0), EltVT, Op);
10326  if (FoldOp.getOpcode() != ISD::UNDEF &&
10327  FoldOp.getOpcode() != ISD::ConstantFP)
10328  break;
10329  Ops.push_back(FoldOp);
10330  AddToWorkList(FoldOp.getNode());
10331  }
10332 
10333  if (Ops.size() != N0.getNumOperands())
10334  return SDValue();
10335 
10336  return DAG.getNode(ISD::BUILD_VECTOR, SDLoc(N),
10337  N0.getValueType(), &Ops[0], Ops.size());
10338 }
10339 
10340 SDValue DAGCombiner::SimplifySelect(SDLoc DL, SDValue N0,
10341  SDValue N1, SDValue N2){
10342  assert(N0.getOpcode() ==ISD::SETCC && "First argument must be a SetCC node!");
10343 
10344  SDValue SCC = SimplifySelectCC(DL, N0.getOperand(0), N0.getOperand(1), N1, N2,
10345  cast<CondCodeSDNode>(N0.getOperand(2))->get());
10346 
10347  // If we got a simplified select_cc node back from SimplifySelectCC, then
10348  // break it down into a new SETCC node, and a new SELECT node, and then return
10349  // the SELECT node, since we were called with a SELECT node.
10350  if (SCC.getNode()) {
10351  // Check to see if we got a select_cc back (to turn into setcc/select).
10352  // Otherwise, just return whatever node we got back, like fabs.
10353  if (SCC.getOpcode() == ISD::SELECT_CC) {
10354  SDValue SETCC = DAG.getNode(ISD::SETCC, SDLoc(N0),
10355  N0.getValueType(),
10356  SCC.getOperand(0), SCC.getOperand(1),
10357  SCC.getOperand(4));
10358  AddToWorkList(SETCC.getNode());
10359  return DAG.getSelect(SDLoc(SCC), SCC.getValueType(),
10360  SCC.getOperand(2), SCC.getOperand(3), SETCC);
10361  }
10362 
10363  return SCC;
10364  }
10365  return SDValue();
10366 }
10367 
10368 /// SimplifySelectOps - Given a SELECT or a SELECT_CC node, where LHS and RHS
10369 /// are the two values being selected between, see if we can simplify the
10370 /// select. Callers of this should assume that TheSelect is deleted if this
10371 /// returns true. As such, they should return the appropriate thing (e.g. the
10372 /// node) back to the top-level of the DAG combiner loop to avoid it being
10373 /// looked at.
10374 bool DAGCombiner::SimplifySelectOps(SDNode *TheSelect, SDValue LHS,
10375  SDValue RHS) {
10376 
10377  // Cannot simplify select with vector condition
10378  if (TheSelect->getOperand(0).getValueType().isVector()) return false;
10379 
10380  // If this is a select from two identical things, try to pull the operation
10381  // through the select.
10382  if (LHS.getOpcode() != RHS.getOpcode() ||
10383  !LHS.hasOneUse() || !RHS.hasOneUse())
10384  return false;
10385 
10386  // If this is a load and the token chain is identical, replace the select
10387  // of two loads with a load through a select of the address to load from.
10388  // This triggers in things like "select bool X, 10.0, 123.0" after the FP
10389  // constants have been dropped into the constant pool.
10390  if (LHS.getOpcode() == ISD::LOAD) {
10391  LoadSDNode *LLD = cast<LoadSDNode>(LHS);
10392  LoadSDNode *RLD = cast<LoadSDNode>(RHS);
10393 
10394  // Token chains must be identical.
10395  if (LHS.getOperand(0) != RHS.getOperand(0) ||
10396  // Do not let this transformation reduce the number of volatile loads.
10397  LLD->isVolatile() || RLD->isVolatile() ||
10398  // If this is an EXTLOAD, the VT's must match.
10399  LLD->getMemoryVT() != RLD->getMemoryVT() ||
10400  // If this is an EXTLOAD, the kind of extension must match.
10401  (LLD->getExtensionType() != RLD->getExtensionType() &&
10402  // The only exception is if one of the extensions is anyext.
10403  LLD->getExtensionType() != ISD::EXTLOAD &&
10404  RLD->getExtensionType() != ISD::EXTLOAD) ||
10405  // FIXME: this discards src value information. This is
10406  // over-conservative. It would be beneficial to be able to remember
10407  // both potential memory locations. Since we are discarding
10408  // src value info, don't do the transformation if the memory
10409  // locations are not in the default address space.
10410  LLD->getPointerInfo().getAddrSpace() != 0 ||
10411  RLD->getPointerInfo().getAddrSpace() != 0 ||
10412  !TLI.isOperationLegalOrCustom(TheSelect->getOpcode(),
10413  LLD->getBasePtr().getValueType()))
10414  return false;
10415 
10416  // Check that the select condition doesn't reach either load. If so,
10417  // folding this will induce a cycle into the DAG. If not, this is safe to
10418  // xform, so create a select of the addresses.
10419  SDValue Addr;
10420  if (TheSelect->getOpcode() == ISD::SELECT) {
10421  SDNode *CondNode = TheSelect->getOperand(0).getNode();
10422  if ((LLD->hasAnyUseOfValue(1) && LLD->isPredecessorOf(CondNode)) ||
10423  (RLD->hasAnyUseOfValue(1) && RLD->isPredecessorOf(CondNode)))
10424  return false;
10425  // The loads must not depend on one another.
10426  if (LLD->isPredecessorOf(RLD) ||
10427  RLD->isPredecessorOf(LLD))
10428  return false;
10429  Addr = DAG.getSelect(SDLoc(TheSelect),
10430  LLD->getBasePtr().getValueType(),
10431  TheSelect->getOperand(0), LLD->getBasePtr(),
10432  RLD->getBasePtr());
10433  } else { // Otherwise SELECT_CC
10434  SDNode *CondLHS = TheSelect->getOperand(0).getNode();
10435  SDNode *CondRHS = TheSelect->getOperand(1).getNode();
10436 
10437  if ((LLD->hasAnyUseOfValue(1) &&
10438  (LLD->isPredecessorOf(CondLHS) || LLD->isPredecessorOf(CondRHS))) ||
10439  (RLD->hasAnyUseOfValue(1) &&
10440  (RLD->isPredecessorOf(CondLHS) || RLD->isPredecessorOf(CondRHS))))
10441  return false;
10442 
10443  Addr = DAG.getNode(ISD::SELECT_CC, SDLoc(TheSelect),
10444  LLD->getBasePtr().getValueType(),
10445  TheSelect->getOperand(0),
10446  TheSelect->getOperand(1),
10447  LLD->getBasePtr(), RLD->getBasePtr(),
10448  TheSelect->getOperand(4));
10449  }
10450 
10451  SDValue Load;
10452  if (LLD->getExtensionType() == ISD::NON_EXTLOAD) {
10453  Load = DAG.getLoad(TheSelect->getValueType(0),
10454  SDLoc(TheSelect),
10455  // FIXME: Discards pointer and TBAA info.
10456  LLD->getChain(), Addr, MachinePointerInfo(),
10457  LLD->isVolatile(), LLD->isNonTemporal(),
10458  LLD->isInvariant(), LLD->getAlignment());
10459  } else {
10460  Load = DAG.getExtLoad(LLD->getExtensionType() == ISD::EXTLOAD ?
10461  RLD->getExtensionType() : LLD->getExtensionType(),
10462  SDLoc(TheSelect),
10463  TheSelect->getValueType(0),
10464  // FIXME: Discards pointer and TBAA info.
10465  LLD->getChain(), Addr, MachinePointerInfo(),
10466  LLD->getMemoryVT(), LLD->isVolatile(),
10467  LLD->isNonTemporal(), LLD->getAlignment());
10468  }
10469 
10470  // Users of the select now use the result of the load.
10471  CombineTo(TheSelect, Load);
10472 
10473  // Users of the old loads now use the new load's chain. We know the
10474  // old-load value is dead now.
10475  CombineTo(LHS.getNode(), Load.getValue(0), Load.getValue(1));
10476  CombineTo(RHS.getNode(), Load.getValue(0), Load.getValue(1));
10477  return true;
10478  }
10479 
10480  return false;
10481 }
10482 
10483 /// SimplifySelectCC - Simplify an expression of the form (N0 cond N1) ? N2 : N3
10484 /// where 'cond' is the comparison specified by CC.
10485 SDValue DAGCombiner::SimplifySelectCC(SDLoc DL, SDValue N0, SDValue N1,
10486  SDValue N2, SDValue N3,
10487  ISD::CondCode CC, bool NotExtCompare) {
10488  // (x ? y : y) -> y.
10489  if (N2 == N3) return N2;
10490 
10491  EVT VT = N2.getValueType();
10495 
10496  // Determine if the condition we're dealing with is constant
10498  N0, N1, CC, DL, false);
10499  if (SCC.getNode()) AddToWorkList(SCC.getNode());
10500  ConstantSDNode *SCCC = dyn_cast_or_null<ConstantSDNode>(SCC.getNode());
10501 
10502  // fold select_cc true, x, y -> x
10503  if (SCCC && !SCCC->isNullValue())
10504  return N2;
10505  // fold select_cc false, x, y -> y
10506  if (SCCC && SCCC->isNullValue())
10507  return N3;
10508 
10509  // Check to see if we can simplify the select into an fabs node
10510  if (ConstantFPSDNode *CFP = dyn_cast<ConstantFPSDNode>(N1)) {
10511  // Allow either -0.0 or 0.0
10512  if (CFP->getValueAPF().isZero()) {
10513  // select (setg[te] X, +/-0.0), X, fneg(X) -> fabs
10514  if ((CC == ISD::SETGE || CC == ISD::SETGT) &&
10515  N0 == N2 && N3.getOpcode() == ISD::FNEG &&
10516  N2 == N3.getOperand(0))
10517  return DAG.getNode(ISD::FABS, DL, VT, N0);
10518 
10519  // select (setl[te] X, +/-0.0), fneg(X), X -> fabs
10520  if ((CC == ISD::SETLT || CC == ISD::SETLE) &&
10521  N0 == N3 && N2.getOpcode() == ISD::FNEG &&
10522  N2.getOperand(0) == N3)
10523  return DAG.getNode(ISD::FABS, DL, VT, N3);
10524  }
10525  }
10526 
10527  // Turn "(a cond b) ? 1.0f : 2.0f" into "load (tmp + ((a cond b) ? 0 : 4)"
10528  // where "tmp" is a constant pool entry containing an array with 1.0 and 2.0
10529  // in it. This is a win when the constant is not otherwise available because
10530  // it replaces two constant pool loads with one. We only do this if the FP
10531  // type is known to be legal, because if it isn't, then we are before legalize
10532  // types an we want the other legalization to happen first (e.g. to avoid
10533  // messing with soft float) and if the ConstantFP is not legal, because if
10534  // it is legal, we may not need to store the FP constant in a constant pool.
10535  if (ConstantFPSDNode *TV = dyn_cast<ConstantFPSDNode>(N2))
10536  if (ConstantFPSDNode *FV = dyn_cast<ConstantFPSDNode>(N3)) {
10537  if (TLI.isTypeLegal(N2.getValueType()) &&
10538  (TLI.getOperationAction(ISD::ConstantFP, N2.getValueType()) !=
10540  // If both constants have multiple uses, then we won't need to do an
10541  // extra load, they are likely around in registers for other users.
10542  (TV->hasOneUse() || FV->hasOneUse())) {
10543  Constant *Elts[] = {
10544  const_cast<ConstantFP*>(FV->getConstantFPValue()),
10545  const_cast<ConstantFP*>(TV->getConstantFPValue())
10546  };
10547  Type *FPTy = Elts[0]->getType();
10548  const DataLayout &TD = *TLI.getDataLayout();
10549 
10550  // Create a ConstantArray of the two constants.
10551  Constant *CA = ConstantArray::get(ArrayType::get(FPTy, 2), Elts);
10552  SDValue CPIdx = DAG.getConstantPool(CA, TLI.getPointerTy(),
10553  TD.getPrefTypeAlignment(FPTy));
10554  unsigned Alignment = cast<ConstantPoolSDNode>(CPIdx)->getAlignment();
10555 
10556  // Get the offsets to the 0 and 1 element of the array so that we can
10557  // select between them.
10558  SDValue Zero = DAG.getIntPtrConstant(0);
10559  unsigned EltSize = (unsigned)TD.getTypeAllocSize(Elts[0]->getType());
10560  SDValue One = DAG.getIntPtrConstant(EltSize);
10561 
10562  SDValue Cond = DAG.getSetCC(DL,
10564  N0, N1, CC);
10565  AddToWorkList(Cond.getNode());
10566  SDValue CstOffset = DAG.getSelect(DL, Zero.getValueType(),
10567  Cond, One, Zero);
10568  AddToWorkList(CstOffset.getNode());
10569  CPIdx = DAG.getNode(ISD::ADD, DL, CPIdx.getValueType(), CPIdx,
10570  CstOffset);
10571  AddToWorkList(CPIdx.getNode());
10572  return DAG.getLoad(TV->getValueType(0), DL, DAG.getEntryNode(), CPIdx,
10574  false, false, Alignment);
10575 
10576  }
10577  }
10578 
10579  // Check to see if we can perform the "gzip trick", transforming
10580  // (select_cc setlt X, 0, A, 0) -> (and (sra X, (sub size(X), 1), A)
10581  if (N1C && N3C && N3C->isNullValue() && CC == ISD::SETLT &&
10582  (N1C->isNullValue() || // (a < 0) ? b : 0
10583  (N1C->getAPIntValue() == 1 && N0 == N2))) { // (a < 1) ? a : 0
10584  EVT XType = N0.getValueType();
10585  EVT AType = N2.getValueType();
10586  if (XType.bitsGE(AType)) {
10587  // and (sra X, size(X)-1, A) -> "and (srl X, C2), A" iff A is a
10588  // single-bit constant.
10589  if (N2C && ((N2C->getAPIntValue() & (N2C->getAPIntValue()-1)) == 0)) {
10590  unsigned ShCtV = N2C->getAPIntValue().logBase2();
10591  ShCtV = XType.getSizeInBits()-ShCtV-1;
10592  SDValue ShCt = DAG.getConstant(ShCtV,
10594  SDValue Shift = DAG.getNode(ISD::SRL, SDLoc(N0),
10595  XType, N0, ShCt);
10596  AddToWorkList(Shift.getNode());
10597 
10598  if (XType.bitsGT(AType)) {
10599  Shift = DAG.getNode(ISD::TRUNCATE, DL, AType, Shift);
10600  AddToWorkList(Shift.getNode());
10601  }
10602 
10603  return DAG.getNode(ISD::AND, DL, AType, Shift, N2);
10604  }
10605 
10606  SDValue Shift = DAG.getNode(ISD::SRA, SDLoc(N0),
10607  XType, N0,
10608  DAG.getConstant(XType.getSizeInBits()-1,
10610  AddToWorkList(Shift.getNode());
10611 
10612  if (XType.bitsGT(AType)) {
10613  Shift = DAG.getNode(ISD::TRUNCATE, DL, AType, Shift);
10614  AddToWorkList(Shift.getNode());
10615  }
10616 
10617  return DAG.getNode(ISD::AND, DL, AType, Shift, N2);
10618  }
10619  }
10620 
10621  // fold (select_cc seteq (and x, y), 0, 0, A) -> (and (shr (shl x)) A)
10622  // where y is has a single bit set.
10623  // A plaintext description would be, we can turn the SELECT_CC into an AND
10624  // when the condition can be materialized as an all-ones register. Any
10625  // single bit-test can be materialized as an all-ones register with
10626  // shift-left and shift-right-arith.
10627  if (CC == ISD::SETEQ && N0->getOpcode() == ISD::AND &&
10628  N0->getValueType(0) == VT &&
10629  N1C && N1C->isNullValue() &&
10630  N2C && N2C->isNullValue()) {
10631  SDValue AndLHS = N0->getOperand(0);
10632  ConstantSDNode *ConstAndRHS = dyn_cast<ConstantSDNode>(N0->getOperand(1));
10633  if (ConstAndRHS && ConstAndRHS->getAPIntValue().countPopulation() == 1) {
10634  // Shift the tested bit over the sign bit.
10635  APInt AndMask = ConstAndRHS->getAPIntValue();
10636  SDValue ShlAmt =
10637  DAG.getConstant(AndMask.countLeadingZeros(),
10638  getShiftAmountTy(AndLHS.getValueType()));
10639  SDValue Shl = DAG.getNode(ISD::SHL, SDLoc(N0), VT, AndLHS, ShlAmt);
10640 
10641  // Now arithmetic right shift it all the way over, so the result is either
10642  // all-ones, or zero.
10643  SDValue ShrAmt =
10644  DAG.getConstant(AndMask.getBitWidth()-1,
10646  SDValue Shr = DAG.getNode(ISD::SRA, SDLoc(N0), VT, Shl, ShrAmt);
10647 
10648  return DAG.getNode(ISD::AND, DL, VT, Shr, N3);
10649  }
10650  }
10651 
10652  // fold select C, 16, 0 -> shl C, 4
10653  if (N2C && N3C && N3C->isNullValue() && N2C->getAPIntValue().isPowerOf2() &&
10656 
10657  // If the caller doesn't want us to simplify this into a zext of a compare,
10658  // don't do it.
10659  if (NotExtCompare && N2C->getAPIntValue() == 1)
10660  return SDValue();
10661 
10662  // Get a SetCC of the condition
10663  // NOTE: Don't create a SETCC if it's not legal on this target.
10664  if (!LegalOperations ||
10666  LegalTypes ? getSetCCResultType(N0.getValueType()) : MVT::i1)) {
10667  SDValue Temp, SCC;
10668  // cast from setcc result type to select result type
10669  if (LegalTypes) {
10670  SCC = DAG.getSetCC(DL, getSetCCResultType(N0.getValueType()),
10671  N0, N1, CC);
10672  if (N2.getValueType().bitsLT(SCC.getValueType()))
10673  Temp = DAG.getZeroExtendInReg(SCC, SDLoc(N2),
10674  N2.getValueType());
10675  else
10676  Temp = DAG.getNode(ISD::ZERO_EXTEND, SDLoc(N2),
10677  N2.getValueType(), SCC);
10678  } else {
10679  SCC = DAG.getSetCC(SDLoc(N0), MVT::i1, N0, N1, CC);
10680  Temp = DAG.getNode(ISD::ZERO_EXTEND, SDLoc(N2),
10681  N2.getValueType(), SCC);
10682  }
10683 
10684  AddToWorkList(SCC.getNode());
10685  AddToWorkList(Temp.getNode());
10686 
10687  if (N2C->getAPIntValue() == 1)
10688  return Temp;
10689 
10690  // shl setcc result by log2 n2c
10691  return DAG.getNode(
10692  ISD::SHL, DL, N2.getValueType(), Temp,
10693  DAG.getConstant(N2C->getAPIntValue().logBase2(),
10694  getShiftAmountTy(Temp.getValueType())));
10695  }
10696  }
10697 
10698  // Check to see if this is the equivalent of setcc
10699  // FIXME: Turn all of these into setcc if setcc if setcc is legal
10700  // otherwise, go ahead with the folds.
10701  if (0 && N3C && N3C->isNullValue() && N2C && (N2C->getAPIntValue() == 1ULL)) {
10702  EVT XType = N0.getValueType();
10703  if (!LegalOperations ||
10705  SDValue Res = DAG.getSetCC(DL, getSetCCResultType(XType), N0, N1, CC);
10706  if (Res.getValueType() != VT)
10707  Res = DAG.getNode(ISD::ZERO_EXTEND, DL, VT, Res);
10708  return Res;
10709  }
10710 
10711  // fold (seteq X, 0) -> (srl (ctlz X, log2(size(X))))
10712  if (N1C && N1C->isNullValue() && CC == ISD::SETEQ &&
10713  (!LegalOperations ||
10714  TLI.isOperationLegal(ISD::CTLZ, XType))) {
10715  SDValue Ctlz = DAG.getNode(ISD::CTLZ, SDLoc(N0), XType, N0);
10716  return DAG.getNode(ISD::SRL, DL, XType, Ctlz,
10717  DAG.getConstant(Log2_32(XType.getSizeInBits()),
10718  getShiftAmountTy(Ctlz.getValueType())));
10719  }
10720  // fold (setgt X, 0) -> (srl (and (-X, ~X), size(X)-1))
10721  if (N1C && N1C->isNullValue() && CC == ISD::SETGT) {
10722  SDValue NegN0 = DAG.getNode(ISD::SUB, SDLoc(N0),
10723  XType, DAG.getConstant(0, XType), N0);
10724  SDValue NotN0 = DAG.getNOT(SDLoc(N0), N0, XType);
10725  return DAG.getNode(ISD::SRL, DL, XType,
10726  DAG.getNode(ISD::AND, DL, XType, NegN0, NotN0),
10727  DAG.getConstant(XType.getSizeInBits()-1,
10728  getShiftAmountTy(XType)));
10729  }
10730  // fold (setgt X, -1) -> (xor (srl (X, size(X)-1), 1))
10731  if (N1C && N1C->isAllOnesValue() && CC == ISD::SETGT) {
10732  SDValue Sign = DAG.getNode(ISD::SRL, SDLoc(N0), XType, N0,
10733  DAG.getConstant(XType.getSizeInBits()-1,
10735  return DAG.getNode(ISD::XOR, DL, XType, Sign, DAG.getConstant(1, XType));
10736  }
10737  }
10738 
10739  // Check to see if this is an integer abs.
10740  // select_cc setg[te] X, 0, X, -X ->
10741  // select_cc setgt X, -1, X, -X ->
10742  // select_cc setl[te] X, 0, -X, X ->
10743  // select_cc setlt X, 1, -X, X ->
10744  // Y = sra (X, size(X)-1); xor (add (X, Y), Y)
10745  if (N1C) {
10746  ConstantSDNode *SubC = NULL;
10747  if (((N1C->isNullValue() && (CC == ISD::SETGT || CC == ISD::SETGE)) ||
10748  (N1C->isAllOnesValue() && CC == ISD::SETGT)) &&
10749  N0 == N2 && N3.getOpcode() == ISD::SUB && N0 == N3.getOperand(1))
10750  SubC = dyn_cast<ConstantSDNode>(N3.getOperand(0));
10751  else if (((N1C->isNullValue() && (CC == ISD::SETLT || CC == ISD::SETLE)) ||
10752  (N1C->isOne() && CC == ISD::SETLT)) &&
10753  N0 == N3 && N2.getOpcode() == ISD::SUB && N0 == N2.getOperand(1))
10754  SubC = dyn_cast<ConstantSDNode>(N2.getOperand(0));
10755 
10756  EVT XType = N0.getValueType();
10757  if (SubC && SubC->isNullValue() && XType.isInteger()) {
10758  SDValue Shift = DAG.getNode(ISD::SRA, SDLoc(N0), XType,
10759  N0,
10760  DAG.getConstant(XType.getSizeInBits()-1,
10761  getShiftAmountTy(N0.getValueType())));
10762  SDValue Add = DAG.getNode(ISD::ADD, SDLoc(N0),
10763  XType, N0, Shift);
10764  AddToWorkList(Shift.getNode());
10765  AddToWorkList(Add.getNode());
10766  return DAG.getNode(ISD::XOR, DL, XType, Add, Shift);
10767  }
10768  }
10769 
10770  return SDValue();
10771 }
10772 
10773 /// SimplifySetCC - This is a stub for TargetLowering::SimplifySetCC.
10774 SDValue DAGCombiner::SimplifySetCC(EVT VT, SDValue N0,
10775  SDValue N1, ISD::CondCode Cond,
10776  SDLoc DL, bool foldBooleans) {
10778  DagCombineInfo(DAG, Level, false, this);
10779  return TLI.SimplifySetCC(VT, N0, N1, Cond, foldBooleans, DagCombineInfo, DL);
10780 }
10781 
10782 /// BuildSDIVSequence - Given an ISD::SDIV node expressing a divide by constant,
10783 /// return a DAG expression to select that will generate the same value by
10784 /// multiplying by a magic number. See:
10785 /// <http://the.wall.riscom.net/books/proc/ppc/cwg/code2.html>
10786 SDValue DAGCombiner::BuildSDIV(SDNode *N) {
10787  std::vector<SDNode*> Built;
10788  SDValue S = TLI.BuildSDIV(N, DAG, LegalOperations, &Built);
10789 
10790  for (std::vector<SDNode*>::iterator ii = Built.begin(), ee = Built.end();
10791  ii != ee; ++ii)
10792  AddToWorkList(*ii);
10793  return S;
10794 }
10795 
10796 /// BuildUDIVSequence - Given an ISD::UDIV node expressing a divide by constant,
10797 /// return a DAG expression to select that will generate the same value by
10798 /// multiplying by a magic number. See:
10799 /// <http://the.wall.riscom.net/books/proc/ppc/cwg/code2.html>
10800 SDValue DAGCombiner::BuildUDIV(SDNode *N) {
10801  std::vector<SDNode*> Built;
10802  SDValue S = TLI.BuildUDIV(N, DAG, LegalOperations, &Built);
10803 
10804  for (std::vector<SDNode*>::iterator ii = Built.begin(), ee = Built.end();
10805  ii != ee; ++ii)
10806  AddToWorkList(*ii);
10807  return S;
10808 }
10809 
10810 /// FindBaseOffset - Return true if base is a frame index, which is known not
10811 // to alias with anything but itself. Provides base object and offset as
10812 // results.
10813 static bool FindBaseOffset(SDValue Ptr, SDValue &Base, int64_t &Offset,
10814  const GlobalValue *&GV, const void *&CV) {
10815  // Assume it is a primitive operation.
10816  Base = Ptr; Offset = 0; GV = 0; CV = 0;
10817 
10818  // If it's an adding a simple constant then integrate the offset.
10819  if (Base.getOpcode() == ISD::ADD) {
10820  if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(Base.getOperand(1))) {
10821  Base = Base.getOperand(0);
10822  Offset += C->getZExtValue();
10823  }
10824  }
10825 
10826  // Return the underlying GlobalValue, and update the Offset. Return false
10827  // for GlobalAddressSDNode since the same GlobalAddress may be represented
10828  // by multiple nodes with different offsets.
10829  if (GlobalAddressSDNode *G = dyn_cast<GlobalAddressSDNode>(Base)) {
10830  GV = G->getGlobal();
10831  Offset += G->getOffset();
10832  return false;
10833  }
10834 
10835  // Return the underlying Constant value, and update the Offset. Return false
10836  // for ConstantSDNodes since the same constant pool entry may be represented
10837  // by multiple nodes with different offsets.
10838  if (ConstantPoolSDNode *C = dyn_cast<ConstantPoolSDNode>(Base)) {
10839  CV = C->isMachineConstantPoolEntry() ? (const void *)C->getMachineCPVal()
10840  : (const void *)C->getConstVal();
10841  Offset += C->getOffset();
10842  return false;
10843  }
10844  // If it's any of the following then it can't alias with anything but itself.
10845  return isa<FrameIndexSDNode>(Base);
10846 }
10847 
10848 /// isAlias - Return true if there is any possibility that the two addresses
10849 /// overlap.
10850 bool DAGCombiner::isAlias(SDValue Ptr1, int64_t Size1, bool IsVolatile1,
10851  const Value *SrcValue1, int SrcValueOffset1,
10852  unsigned SrcValueAlign1,
10853  const MDNode *TBAAInfo1,
10854  SDValue Ptr2, int64_t Size2, bool IsVolatile2,
10855  const Value *SrcValue2, int SrcValueOffset2,
10856  unsigned SrcValueAlign2,
10857  const MDNode *TBAAInfo2) const {
10858  // If they are the same then they must be aliases.
10859  if (Ptr1 == Ptr2) return true;
10860 
10861  // If they are both volatile then they cannot be reordered.
10862  if (IsVolatile1 && IsVolatile2) return true;
10863 
10864  // Gather base node and offset information.
10865  SDValue Base1, Base2;
10866  int64_t Offset1, Offset2;
10867  const GlobalValue *GV1, *GV2;
10868  const void *CV1, *CV2;
10869  bool isFrameIndex1 = FindBaseOffset(Ptr1, Base1, Offset1, GV1, CV1);
10870  bool isFrameIndex2 = FindBaseOffset(Ptr2, Base2, Offset2, GV2, CV2);
10871 
10872  // If they have a same base address then check to see if they overlap.
10873  if (Base1 == Base2 || (GV1 && (GV1 == GV2)) || (CV1 && (CV1 == CV2)))
10874  return !((Offset1 + Size1) <= Offset2 || (Offset2 + Size2) <= Offset1);
10875 
10876  // It is possible for different frame indices to alias each other, mostly
10877  // when tail call optimization reuses return address slots for arguments.
10878  // To catch this case, look up the actual index of frame indices to compute
10879  // the real alias relationship.
10880  if (isFrameIndex1 && isFrameIndex2) {
10881  MachineFrameInfo *MFI = DAG.getMachineFunction().getFrameInfo();
10882  Offset1 += MFI->getObjectOffset(cast<FrameIndexSDNode>(Base1)->getIndex());
10883  Offset2 += MFI->getObjectOffset(cast<FrameIndexSDNode>(Base2)->getIndex());
10884  return !((Offset1 + Size1) <= Offset2 || (Offset2 + Size2) <= Offset1);
10885  }
10886 
10887  // Otherwise, if we know what the bases are, and they aren't identical, then
10888  // we know they cannot alias.
10889  if ((isFrameIndex1 || CV1 || GV1) && (isFrameIndex2 || CV2 || GV2))
10890  return false;
10891 
10892  // If we know required SrcValue1 and SrcValue2 have relatively large alignment
10893  // compared to the size and offset of the access, we may be able to prove they
10894  // do not alias. This check is conservative for now to catch cases created by
10895  // splitting vector types.
10896  if ((SrcValueAlign1 == SrcValueAlign2) &&
10897  (SrcValueOffset1 != SrcValueOffset2) &&
10898  (Size1 == Size2) && (SrcValueAlign1 > Size1)) {
10899  int64_t OffAlign1 = SrcValueOffset1 % SrcValueAlign1;
10900  int64_t OffAlign2 = SrcValueOffset2 % SrcValueAlign1;
10901 
10902  // There is no overlap between these relatively aligned accesses of similar
10903  // size, return no alias.
10904  if ((OffAlign1 + Size1) <= OffAlign2 || (OffAlign2 + Size2) <= OffAlign1)
10905  return false;
10906  }
10907 
10908  bool UseAA = CombinerGlobalAA.getNumOccurrences() > 0 ? CombinerGlobalAA :
10910  if (UseAA && SrcValue1 && SrcValue2) {
10911  // Use alias analysis information.
10912  int64_t MinOffset = std::min(SrcValueOffset1, SrcValueOffset2);
10913  int64_t Overlap1 = Size1 + SrcValueOffset1 - MinOffset;
10914  int64_t Overlap2 = Size2 + SrcValueOffset2 - MinOffset;
10915  AliasAnalysis::AliasResult AAResult =
10916  AA.alias(AliasAnalysis::Location(SrcValue1, Overlap1, TBAAInfo1),
10917  AliasAnalysis::Location(SrcValue2, Overlap2, TBAAInfo2));
10918  if (AAResult == AliasAnalysis::NoAlias)
10919  return false;
10920  }
10921 
10922  // Otherwise we have to assume they alias.
10923  return true;
10924 }
10925 
10926 bool DAGCombiner::isAlias(LSBaseSDNode *Op0, LSBaseSDNode *Op1) {
10927  SDValue Ptr0, Ptr1;
10928  int64_t Size0, Size1;
10929  bool IsVolatile0, IsVolatile1;
10930  const Value *SrcValue0, *SrcValue1;
10931  int SrcValueOffset0, SrcValueOffset1;
10932  unsigned SrcValueAlign0, SrcValueAlign1;
10933  const MDNode *SrcTBAAInfo0, *SrcTBAAInfo1;
10934  FindAliasInfo(Op0, Ptr0, Size0, IsVolatile0, SrcValue0, SrcValueOffset0,
10935  SrcValueAlign0, SrcTBAAInfo0);
10936  FindAliasInfo(Op1, Ptr1, Size1, IsVolatile1, SrcValue1, SrcValueOffset1,
10937  SrcValueAlign1, SrcTBAAInfo1);
10938  return isAlias(Ptr0, Size0, IsVolatile0, SrcValue0, SrcValueOffset0,
10939  SrcValueAlign0, SrcTBAAInfo0,
10940  Ptr1, Size1, IsVolatile1, SrcValue1, SrcValueOffset1,
10941  SrcValueAlign1, SrcTBAAInfo1);
10942 }
10943 
10944 /// FindAliasInfo - Extracts the relevant alias information from the memory
10945 /// node. Returns true if the operand was a nonvolatile load.
10946 bool DAGCombiner::FindAliasInfo(SDNode *N,
10947  SDValue &Ptr, int64_t &Size, bool &IsVolatile,
10948  const Value *&SrcValue,
10949  int &SrcValueOffset,
10950  unsigned &SrcValueAlign,
10951  const MDNode *&TBAAInfo) const {
10952  LSBaseSDNode *LS = cast<LSBaseSDNode>(N);
10953 
10954  Ptr = LS->getBasePtr();
10955  Size = LS->getMemoryVT().getSizeInBits() >> 3;
10956  IsVolatile = LS->isVolatile();
10957  SrcValue = LS->getSrcValue();
10958  SrcValueOffset = LS->getSrcValueOffset();
10959  SrcValueAlign = LS->getOriginalAlignment();
10960  TBAAInfo = LS->getTBAAInfo();
10961  return isa<LoadSDNode>(LS) && !IsVolatile;
10962 }
10963 
10964 /// GatherAllAliases - Walk up chain skipping non-aliasing memory nodes,
10965 /// looking for aliasing nodes and adding them to the Aliases vector.
10966 void DAGCombiner::GatherAllAliases(SDNode *N, SDValue OriginalChain,
10967  SmallVectorImpl<SDValue> &Aliases) {
10968  SmallVector<SDValue, 8> Chains; // List of chains to visit.
10969  SmallPtrSet<SDNode *, 16> Visited; // Visited node set.
10970 
10971  // Get alias information for node.
10972  SDValue Ptr;
10973  int64_t Size;
10974  bool IsVolatile;
10975  const Value *SrcValue;
10976  int SrcValueOffset;
10977  unsigned SrcValueAlign;
10978  const MDNode *SrcTBAAInfo;
10979  bool IsLoad = FindAliasInfo(N, Ptr, Size, IsVolatile, SrcValue,
10980  SrcValueOffset, SrcValueAlign, SrcTBAAInfo);
10981 
10982  // Starting off.
10983  Chains.push_back(OriginalChain);
10984  unsigned Depth = 0;
10985 
10986  // Look at each chain and determine if it is an alias. If so, add it to the
10987  // aliases list. If not, then continue up the chain looking for the next
10988  // candidate.
10989  while (!Chains.empty()) {
10990  SDValue Chain = Chains.back();
10991  Chains.pop_back();
10992 
10993  // For TokenFactor nodes, look at each operand and only continue up the
10994  // chain until we find two aliases. If we've seen two aliases, assume we'll
10995  // find more and revert to original chain since the xform is unlikely to be
10996  // profitable.
10997  //
10998  // FIXME: The depth check could be made to return the last non-aliasing
10999  // chain we found before we hit a tokenfactor rather than the original
11000  // chain.
11001  if (Depth > 6 || Aliases.size() == 2) {
11002  Aliases.clear();
11003  Aliases.push_back(OriginalChain);
11004  break;
11005  }
11006 
11007  // Don't bother if we've been before.
11008  if (!Visited.insert(Chain.getNode()))
11009  continue;
11010 
11011  switch (Chain.getOpcode()) {
11012  case ISD::EntryToken:
11013  // Entry token is ideal chain operand, but handled in FindBetterChain.
11014  break;
11015 
11016  case ISD::LOAD:
11017  case ISD::STORE: {
11018  // Get alias information for Chain.
11019  SDValue OpPtr;
11020  int64_t OpSize;
11021  bool OpIsVolatile;
11022  const Value *OpSrcValue;
11023  int OpSrcValueOffset;
11024  unsigned OpSrcValueAlign;
11025  const MDNode *OpSrcTBAAInfo;
11026  bool IsOpLoad = FindAliasInfo(Chain.getNode(), OpPtr, OpSize,
11027  OpIsVolatile, OpSrcValue, OpSrcValueOffset,
11028  OpSrcValueAlign,
11029  OpSrcTBAAInfo);
11030 
11031  // If chain is alias then stop here.
11032  if (!(IsLoad && IsOpLoad) &&
11033  isAlias(Ptr, Size, IsVolatile, SrcValue, SrcValueOffset,
11034  SrcValueAlign, SrcTBAAInfo,
11035  OpPtr, OpSize, OpIsVolatile, OpSrcValue, OpSrcValueOffset,
11036  OpSrcValueAlign, OpSrcTBAAInfo)) {
11037  Aliases.push_back(Chain);
11038  } else {
11039  // Look further up the chain.
11040  Chains.push_back(Chain.getOperand(0));
11041  ++Depth;
11042  }
11043  break;
11044  }
11045 
11046  case ISD::TokenFactor:
11047  // We have to check each of the operands of the token factor for "small"
11048  // token factors, so we queue them up. Adding the operands to the queue
11049  // (stack) in reverse order maintains the original order and increases the
11050  // likelihood that getNode will find a matching token factor (CSE.)
11051  if (Chain.getNumOperands() > 16) {
11052  Aliases.push_back(Chain);
11053  break;
11054  }
11055  for (unsigned n = Chain.getNumOperands(); n;)
11056  Chains.push_back(Chain.getOperand(--n));
11057  ++Depth;
11058  break;
11059 
11060  default:
11061  // For all other instructions we will just have to take what we can get.
11062  Aliases.push_back(Chain);
11063  break;
11064  }
11065  }
11066 }
11067 
11068 /// FindBetterChain - Walk up chain skipping non-aliasing memory nodes, looking
11069 /// for a better chain (aliasing node.)
11070 SDValue DAGCombiner::FindBetterChain(SDNode *N, SDValue OldChain) {
11071  SmallVector<SDValue, 8> Aliases; // Ops for replacing token factor.
11072 
11073  // Accumulate all the aliases to this node.
11074  GatherAllAliases(N, OldChain, Aliases);
11075 
11076  // If no operands then chain to entry token.
11077  if (Aliases.size() == 0)
11078  return DAG.getEntryNode();
11079 
11080  // If a single operand then chain to it. We don't need to revisit it.
11081  if (Aliases.size() == 1)
11082  return Aliases[0];
11083 
11084  // Construct a custom tailored token factor.
11085  return DAG.getNode(ISD::TokenFactor, SDLoc(N), MVT::Other,
11086  &Aliases[0], Aliases.size());
11087 }
11088 
11089 // SelectionDAG::Combine - This is the entry point for the file.
11090 //
11092  CodeGenOpt::Level OptLevel) {
11093  /// run - This is the main entry point to this class.
11094  ///
11095  DAGCombiner(*this, AA, OptLevel).Run(Level);
11096 }
MVT getSimpleValueType() const
Return the simple ValueType of the referenced return value.
unsigned getValueSizeInBits(unsigned ResNo) const
bool use_empty() const
opStatus divide(const APFloat &, roundingMode)
Definition: APFloat.cpp:1675
static MVT getIntegerVT(unsigned BitWidth)
Definition: ValueTypes.h:481
std::string & operator+=(std::string &buffer, StringRef string)
Definition: StringRef.h:544
SDValue getConstant(uint64_t Val, EVT VT, bool isTarget=false)
SDValue getValue(unsigned R) const
static APInt getSignBit(unsigned BitWidth)
Get the SignBit for a specific bit width.
Definition: APInt.h:443
bool isIndexedStoreLegal(unsigned IdxMode, EVT VT) const
Return true if the specified indexed load is legal on this target.
bool isExactlyValue(double V) const
static bool areSlicesNextToEachOther(const LoadedSlice &First, const LoadedSlice &Second)
Check whether or not First and Second are next to each other in memory. This means that there is no h...
static APInt getAllOnesValue(unsigned numBits)
Get the all-ones value.
Definition: APInt.h:450
LLVMContext * getContext() const
Definition: SelectionDAG.h:285
uint64_t getZExtValue() const
Get zero extended value.
Definition: APInt.h:1306
void dump() const
dump - Dump this node, for debugging.
LegalizeTypeAction getTypeAction(LLVMContext &Context, EVT VT) const
bool isNON_TRUNCStore(const SDNode *N)
Sorts LoadedSlice according to their offset.
bool hasOneUse() const
bool hasNUsesOfValue(unsigned NUses, unsigned Value) const
bool hasOneUse() const
const TargetMachine & getTargetMachine() const
SDVTList getVTList() const
bool equalBaseIndex(const BaseIndexOffset &Other)
static bool MatchRotateHalf(SDValue Op, SDValue &Shift, SDValue &Mask)
MatchRotateHalf - Match "(X shl/srl V1) & V2" where V2 may not be present.
enable_if_c<!is_simple_type< Y >::value, typename cast_retty< X, const Y >::ret_type >::type dyn_cast(const Y &Val)
Definition: Casting.h:266
const TargetMachine & getTarget() const
Definition: SelectionDAG.h:281
bool isExtended() const
Definition: ValueTypes.h:646
unsigned getPrefTypeAlignment(Type *Ty) const
Definition: DataLayout.cpp:600
std::pair< EVT, EVT > GetSplitDestVTs(const EVT &VT) const
CondCode getSetCCAndOperation(CondCode Op1, CondCode Op2, bool isInteger)
static APInt getLowBitsSet(unsigned numBits, unsigned loBitsSet)
Get a value with low bits set.
Definition: APInt.h:528
virtual bool getPreIndexedAddressParts(SDNode *, SDValue &, SDValue &, ISD::MemIndexedMode &, SelectionDAG &) const
bool insert(PtrType Ptr)
Definition: SmallPtrSet.h:253
unsigned getOpcode() const
void operator>(const Optional< T > &X, const Optional< U > &Y)
Poison comparison between two Optional objects. Clients needs to explicitly compare the underlying va...
void operator<(const Optional< T > &X, const Optional< U > &Y)
Poison comparison between two Optional objects. Clients needs to explicitly compare the underlying va...
virtual bool isFAbsFree(EVT VT) const
Type * getTypeForEVT(LLVMContext &Context) const
Definition: ValueTypes.cpp:180
unsigned getSizeInBits() const
Definition: ValueTypes.h:359
bool isUnindexed() const
isUnindexed - Return true if this is NOT a pre/post inc/dec load/store.
bool isMask(unsigned numBits, const APInt &APIVal)
Definition: APInt.h:1717
unsigned getNumOperands() const
void setAllBits()
Set every bit to 1.
Definition: APInt.h:1200
const TargetRegisterClass * getCommonSubClass(const TargetRegisterClass *A, const TargetRegisterClass *B) const
unsigned getNumOperands() const
unsigned getValueSizeInBits() const
MDNode - a tuple of other values.
Definition: Metadata.h:69
const SDValue & getOperand(unsigned Num) const
static MachinePointerInfo getConstantPool()
static bool isCommutativeBinOp(unsigned Opcode)
Definition: SelectionDAG.h:988
void ComputeMaskedBits(SDValue Op, APInt &KnownZero, APInt &KnownOne, unsigned Depth=0) const
static bool FindBaseOffset(SDValue Ptr, SDValue &Base, int64_t &Offset, const GlobalValue *&GV, const void *&CV)
FindBaseOffset - Return true if base is a frame index, which is known not.
LegalizeAction getOperationAction(unsigned Op, EVT VT) const
bool hasAttribute(unsigned Index, Attribute::AttrKind Kind) const
Return true if the attribute exists at the given index.
Definition: Attributes.cpp:818
const SDValue & getBasePtr() const
APInt LLVM_ATTRIBUTE_UNUSED_RESULT zextOrTrunc(unsigned width) const
Zero extend or truncate to width.
Definition: APInt.cpp:1002
static SDNode * ShrinkLoadReplaceStoreWithStore(const std::pair< unsigned, unsigned > &MaskInfo, SDValue IVal, StoreSDNode *St, DAGCombiner *DC)
virtual bool isFPImmLegal(const APFloat &, EVT) const
EntryToken - This is the marker used to indicate the start of a region.
Definition: ISDOpcodes.h:45
unsigned getResNo() const
get the index which selects a specific result in the SDNode
bool bitsLT(EVT VT) const
bitsLT - Return true if this has less bits than VT.
Definition: ValueTypes.h:735
bool SimplifyDemandedBits(SDValue Op, const APInt &DemandedMask, APInt &KnownZero, APInt &KnownOne, TargetLoweringOpt &TLO, unsigned Depth=0) const
int64_t getSrcValueOffset() const
bool isAllOnesValue() const
bool isIndexedLoadLegal(unsigned IdxMode, EVT VT) const
Return true if the specified indexed load is legal on this target.
EVT getValueType(Type *Ty, bool AllowUnknown=false) const
bool isNegative() const
Determine sign of this APInt.
Definition: APInt.h:322
bool isVector() const
isVector - Return true if this is a vector value type.
Definition: ValueTypes.h:661
EVT getShiftAmountTy(EVT LHSTy) const
bool isSEXTLoad(const SDNode *N)
bool isLittleEndian() const
unsigned CountTrailingOnes_64(uint64_t Value)
Definition: MathExtras.h:410
SDValue getStore(SDValue Chain, SDLoc dl, SDValue Val, SDValue Ptr, MachinePointerInfo PtrInfo, bool isVolatile, bool isNonTemporal, unsigned Alignment, const MDNode *TBAAInfo=0)
bool isRound() const
isRound - Return true if the size is a power-of-two number of bytes.
Definition: ValueTypes.h:711
virtual bool isLegalAddImmediate(int64_t) const
bool isNormalStore(const SDNode *N)
#define llvm_unreachable(msg)
bool isBuildVectorAllZeros(const SDNode *N)
EVT getValueType(unsigned ResNo) const
Definition: Use.h:60
bool HonorSignDependentRoundingFPMath() const
APInt LLVM_ATTRIBUTE_UNUSED_RESULT lshr(unsigned shiftAmt) const
Logical right-shift function.
Definition: APInt.cpp:1127
virtual EVT getSetCCResultType(LLVMContext &Context, EVT VT) const
unsigned logBase2(const APInt &APIVal)
Returns the floor log base 2 of the specified APInt value.
Definition: APInt.h:1732
bool isZEXTLoad(const SDNode *N)
MachinePointerInfo getWithOffset(int64_t O) const
SimpleValueType SimpleTy
Definition: ValueTypes.h:161
EVT getScalarType() const
Definition: ValueTypes.h:756
Abstract Stack Frame Information.
bool bitsGE(EVT VT) const
bitsGE - Return true if this has no less bits than VT.
Definition: ValueTypes.h:729
int getMaskElt(unsigned Idx) const
virtual MVT getPointerTy(uint32_t=0) const
enable_if_c< std::numeric_limits< T >::is_integer &&!std::numeric_limits< T >::is_signed, std::size_t >::type countLeadingZeros(T Val, ZeroBehavior ZB=ZB_Width)
Count number of 0's from the most significant bit to the least stopping at the first 1...
Definition: MathExtras.h:120
#define false
Definition: ConvertUTF.c:64
SDValue SimplifySetCC(EVT VT, SDValue N0, SDValue N1, ISD::CondCode Cond, bool foldBooleans, DAGCombinerInfo &DCI, SDLoc dl) const
#define G(x, y, z)
Definition: MD5.cpp:52
bool isInteger() const
isInteger - Return true if this is an integer, or a vector integer type.
Definition: ValueTypes.h:656
bool getBoolValue() const
Convert APInt to a boolean value.
Definition: APInt.h:404
SDValue getConstantFP(double Val, EVT VT, bool isTarget=false)
APInt lshr(const APInt &LHS, unsigned shiftAmt)
Logical right-shift function.
Definition: APInt.h:1790
EVT getVectorElementType() const
Definition: ValueTypes.h:762
virtual bool getPostIndexedAddressParts(SDNode *, SDNode *, SDValue &, SDValue &, ISD::MemIndexedMode &, SelectionDAG &) const
STATISTIC(NodesCombined,"Number of dag nodes combined")
unsigned getNumValues() const
bool LLVM_ATTRIBUTE_UNUSED_RESULT empty() const
Definition: SmallVector.h:56
enable_if_c< std::numeric_limits< T >::is_integer &&!std::numeric_limits< T >::is_signed, std::size_t >::type countTrailingZeros(T Val, ZeroBehavior ZB=ZB_Width)
Count number of 0's from the least significant bit to the most stopping at the first 1...
Definition: MathExtras.h:49
SDValue CombineTo(SDNode *N, const std::vector< SDValue > &To, bool AddTo=true)
Simple integer binary arithmetic operators.
Definition: ISDOpcodes.h:176
unsigned getActiveBits() const
Compute the number of active bits in the value.
Definition: APInt.h:1276
const SDValue & getBasePtr() const
SDValue getUNDEF(EVT VT)
getUNDEF - Return an UNDEF node. UNDEF does not have a useful SDLoc.
Definition: SelectionDAG.h:585
static bool isConstantSplatVector(SDNode *N, APInt &SplatValue)
Sorts store nodes in a link according to their offset from a shared.
const APInt & getAPIntValue() const
bool isLoadExtLegal(unsigned ExtType, EVT VT) const
Return true if the specified load with extension is legal on this target.
APInt LLVM_ATTRIBUTE_UNUSED_RESULT shl(unsigned shiftAmt) const
Left-shift function.
Definition: APInt.h:856
virtual bool isDesirableToTransformToIntegerOp(unsigned, EVT) const
EVT getMemoryVT() const
getMemoryVT - Return the type of the in-memory value.
SDValue BuildSDIV(SDNode *N, SelectionDAG &DAG, bool IsAfterLegalization, std::vector< SDNode * > *Created) const
Given an ISD::SDIV node expressing a divide by constant, return a DAG expression to select that will ...
bool isSignedIntSetCC(CondCode Code)
Definition: ISDOpcodes.h:752
virtual MVT getVectorIdxTy() const
static bool isTruncateOf(SelectionDAG &DAG, SDValue N, SDValue &Op, APInt &KnownZero)
UNDEF - An undefined node.
Definition: ISDOpcodes.h:154
static bool isBSwapHWordElement(SDValue N, SmallVectorImpl< SDNode * > &Parts)
static APInt getHighBitsSet(unsigned numBits, unsigned hiBitsSet)
Get a value with high bits set.
Definition: APInt.h:510
bool isConstantSplat(APInt &SplatValue, APInt &SplatUndef, unsigned &SplatBitSize, bool &HasAnyUndefs, unsigned MinSplatBits=0, bool isBigEndian=false)
SDNode * getNode() const
get the SDNode which holds the desired result
void operator<=(const Optional< T > &X, const Optional< U > &Y)
Poison comparison between two Optional objects. Clients needs to explicitly compare the underlying va...
bool isTruncStoreLegal(EVT ValVT, EVT MemVT) const
A self-contained host- and target-independent arbitrary-precision floating-point software implementat...
Definition: APFloat.h:122
unsigned getStoreSize() const
Definition: ValueTypes.h:787
bool isTypeLegal(EVT VT) const
static bool ExtendUsesToFormExtLoad(SDNode *N, SDValue N0, unsigned ExtOpc, SmallVectorImpl< SDNode * > &ExtendNodes, const TargetLowering &TLI)
initializer< Ty > init(const Ty &Val)
Definition: CommandLine.h:314
bool isNormalLoad(const SDNode *N)
unsigned getStoreSizeInBits() const
Definition: ValueTypes.h:793
const SDValue & getBasePtr() const
bool isZero() const
isZero - Return true if the value is positive or negative zero.
APInt LLVM_ATTRIBUTE_UNUSED_RESULT trunc(unsigned width) const
Truncate to new width.
Definition: APInt.cpp:919
const SDValue & getOperand(unsigned i) const
Simple binary floating point operators.
Definition: ISDOpcodes.h:222
bool isNonTemporal() const
bool isOperationLegalOrCustom(unsigned Op, EVT VT) const
void Combine(CombineLevel Level, AliasAnalysis &AA, CodeGenOpt::Level OptLevel)
LLVM Constant Representation.
Definition: Constant.h:41
int64_t getSExtValue() const
Get sign extended value.
Definition: APInt.h:1318
static BaseIndexOffset match(SDValue Ptr)
Parses tree in Ptr for base, index, offset addresses.
bool isOperationLegal(unsigned Op, EVT VT) const
Return true if the specified operation is legal on this target.
static Constant * get(ArrayType *T, ArrayRef< Constant * > V)
Definition: Constants.cpp:745
std::pair< SDValue, SDValue > SplitVectorOperand(const SDNode *N, unsigned OpNo)
CombineLevel
Definition: DAGCombine.h:16
APInt LLVM_ATTRIBUTE_UNUSED_RESULT sext(unsigned width) const
Sign extend to a new width.
Definition: APInt.cpp:942
virtual bool isTruncateFree(Type *, Type *) const
unsigned getOriginalAlignment() const
Returns alignment and volatility of the memory access.
const DataLayout * getDataLayout() const
unsigned getBitWidth() const
Return the number of bits in the APInt.
Definition: APInt.h:1252
unsigned getOpcode() const
virtual bool isNarrowingProfitable(EVT, EVT) const
void changeSign()
Definition: APFloat.cpp:1589
CondCode getSetCCSwappedOperands(CondCode Operation)
unsigned countPopulation() const
Count the number of bits set.
Definition: APInt.h:1394
use_iterator use_begin() const
bool isVolatile() const
const SDValue & getValue() const
Location - A description of a memory location.
Bit counting operators with an undefined result for zero inputs.
Definition: ISDOpcodes.h:312
bool MaskedValueIsZero(SDValue Op, const APInt &Mask, unsigned Depth=0) const
X = FP_EXTEND(Y) - Extend a smaller FP type into a larger FP type.
Definition: ISDOpcodes.h:411
void append(in_iter in_start, in_iter in_end)
Definition: SmallVector.h:445
uint64_t NextPowerOf2(uint64_t A)
Definition: MathExtras.h:546
static SDValue tryFoldToZero(SDLoc DL, const TargetLowering &TLI, EVT VT, SelectionDAG &DAG, bool LegalOperations, bool LegalTypes)
const APFloat & getValueAPF() const
bool bitsEq(EVT VT) const
bitsEq - Return true if this has the same number of bits as VT.
Definition: ValueTypes.h:717
const ConstantFP * getConstantFPValue() const
static bool areUsedBitsDense(const APInt &UsedBits)
Check that all bits set in UsedBits form a dense region, i.e., UsedBits looks like 0...
static void adjustCostForPairing(SmallVectorImpl< LoadedSlice > &LoadedSlices, LoadedSlice::Cost &GlobalLSCost)
Adjust the GlobalLSCost according to the target paring capabilities and the layout of the slices...
ISD::MemIndexedMode getAddressingMode() const
static EVT getVectorVT(LLVMContext &Context, EVT VT, unsigned NumElements)
Definition: ValueTypes.h:616
uint64_t getConstantOperandVal(unsigned Num) const
bool isPowerOf2() const
Check if this APInt's value is a power of two greater than zero.
Definition: APInt.h:390
static char isNegatibleForFree(SDValue Op, bool LegalOperations, const TargetLowering &TLI, const TargetOptions *Options, unsigned Depth=0)
bool isEXTLoad(const SDNode *N)
const MachinePointerInfo & getPointerInfo() const
int64_t getObjectOffset(int ObjectIdx) const
bool isUNINDEXEDLoad(const SDNode *N)
const SDValue & getOffset() const
bool bitsGT(EVT VT) const
bitsGT - Return true if this has more bits than VT.
Definition: ValueTypes.h:723
ArrayRef< int > getMask() const
unsigned countTrailingZeros() const
Count the number of trailing zero bits.
Definition: APInt.cpp:736
unsigned getABITypeAlignment(Type *Ty) const
Definition: DataLayout.cpp:582
bool isBuildVectorAllOnes(const SDNode *N)
Node predicates.
static bool canFoldInAddressingMode(SDNode *N, SDNode *Use, SelectionDAG &DAG, const TargetLowering &TLI)
const STC & getSubtarget() const
bool isNegative() const
Definition: APFloat.h:361
uint64_t getTypeAllocSize(Type *Ty) const
Definition: DataLayout.h:326
bool isInvariant() const
BooleanContent getBooleanContents(bool isVec) const
virtual bool isVectorClearMaskLegal(const SmallVectorImpl< int > &, EVT) const
unsigned logBase2() const
Definition: APInt.h:1500
bool isPredecessorOf(const SDNode *N) const
Type * getType() const
Definition: Value.h:111
bool allOperandsUndef(const SDNode *N)
const SDValue & getChain() const
Byte Swap and Counting operators.
Definition: ISDOpcodes.h:309
MachineMemOperand * getMemOperand() const
void operator>=(const Optional< T > &X, const Optional< U > &Y)
Poison comparison between two Optional objects. Clients needs to explicitly compare the underlying va...
unsigned getAddrSpace() const
CondCode getSetCCInverse(CondCode Operation, bool isInteger)
static std::pair< SDValue, SDValue > SplitVSETCC(const SDNode *N, SelectionDAG &DAG)
raw_ostream & dbgs()
dbgs - Return a circular-buffered debug stream.
Definition: Debug.cpp:101
unsigned Log2_32(uint32_t Value)
Definition: MathExtras.h:443
void swap(llvm::BitVector &LHS, llvm::BitVector &RHS)
Implement std::swap in terms of BitVector swap.
Definition: BitVector.h:591
const MDNode * getTBAAInfo() const
Returns the TBAAInfo that describes the dereference.
ISD::LoadExtType getExtensionType() const
Class for arbitrary precision integers.
Definition: APInt.h:75
static bool isOneUseSetCC(SDValue N)
static SDNode * getBuildPairElt(SDNode *N, unsigned i)
int64_t getSExtValue() const
op_iterator op_begin() const
virtual const TargetRegisterClass * getRegClassFor(MVT VT) const
static SDValue GetNegatedExpression(SDValue Op, SelectionDAG &DAG, bool LegalOperations, unsigned Depth=0)
static use_iterator use_end()
ZERO_EXTEND - Used for integer types, zeroing the new bits.
Definition: ISDOpcodes.h:357
AddrMode
ARM Addressing Modes.
Definition: ARMBaseInfo.h:234
APInt bitcastToAPInt() const
Definition: APFloat.cpp:3050
SDValue getNode(unsigned Opcode, SDLoc DL, EVT VT)
ANY_EXTEND - Used for integer types. The high bits are undefined.
Definition: ISDOpcodes.h:360
static std::pair< unsigned, unsigned > CheckForMaskedLoad(SDValue V, SDValue Ptr, SDValue Chain)
static cl::opt< AlignMode > Align(cl::desc("Load/store alignment support"), cl::Hidden, cl::init(DefaultAlign), cl::values(clEnumValN(DefaultAlign,"arm-default-align","Generate unaligned accesses only on hardware/OS ""combinations that are known to support them"), clEnumValN(StrictAlign,"arm-strict-align","Disallow all unaligned memory accesses"), clEnumValN(NoStrictAlign,"arm-no-strict-align","Allow unaligned memory accesses"), clEnumValEnd))
CondCode getSetCCOrOperation(CondCode Op1, CondCode Op2, bool isInteger)
static SDValue partitionShuffleOfConcats(SDNode *N, SelectionDAG &DAG)
bool operator()(const LoadedSlice &LHS, const LoadedSlice &RHS)
APInt And(const APInt &LHS, const APInt &RHS)
Bitwise AND function for APInt.
Definition: APInt.h:1840
static APInt getBitsSet(unsigned numBits, unsigned loBit, unsigned hiBit)
Get a value with a block of bits set.
Definition: APInt.h:495
bool operator!=(uint64_t V1, const APInt &V2)
Definition: APInt.h:1686
bool isAllOnesValue() const
Determine if all bits are set.
Definition: APInt.h:340
uint64_t MinAlign(uint64_t A, uint64_t B)
Definition: MathExtras.h:535
BaseIndexOffset(SDValue Base, SDValue Index, int64_t Offset, bool IsIndexSignExt)
bool isScalarToVector(const SDNode *N)
Bitwise operators - logical and, logical or, logical xor.
Definition: ISDOpcodes.h:295
bool hasAnyUseOfValue(unsigned Value) const
unsigned getSizeInBits() const
getSizeInBits - Return the size of the specified value type in bits.
Definition: ValueTypes.h:779
static bool isSetCCEquivalent(SDValue N, SDValue &LHS, SDValue &RHS, SDValue &CC)
#define I(x, y, z)
Definition: MD5.cpp:54
#define N
bool isIndexed() const
isIndexed - Return true if this is a pre/post inc/dec load/store.
op_iterator op_end() const
static ArrayType * get(Type *ElementType, uint64_t NumElements)
Definition: Type.cpp:679
Same for multiplication.
Definition: ISDOpcodes.h:219
virtual const TargetRegisterInfo * getRegisterInfo() const
static SDValue getInputChainForNode(SDNode *N)
bool isNON_EXTLoad(const SDNode *N)
const Value * getSrcValue() const
Returns the SrcValue and offset that describes the location of the access.
bool operator()(MemOpLink LHS, MemOpLink RHS)
EVT getValueType() const
bool isByteSized() const
isByteSized - Return true if the bit size is a multiple of 8.
Definition: ValueTypes.h:706
bool isFloatingPoint() const
isFloatingPoint - Return true if this is a FP, or a vector FP type.
Definition: ValueTypes.h:651
bool hasPredecessorHelper(const SDNode *N, SmallPtrSet< const SDNode *, 32 > &Visited, SmallVectorImpl< const SDNode * > &Worklist) const
bool isSimple() const
Definition: ValueTypes.h:640
EVT getTypeToTransformTo(LLVMContext &Context, EVT VT) const
LLVM Value Representation.
Definition: Value.h:66
static bool isSlicingProfitable(SmallVectorImpl< LoadedSlice > &LoadedSlices, const APInt &UsedBits, bool ForCodeSize)
Check the profitability of all involved LoadedSlice. Currently, it is considered profitable if there ...
bool isZero() const
Returns true if and only if the float is plus or minus zero.
Definition: APFloat.h:376
bool isTruncatingStore() const
Disable implicit floating point insts.
Definition: Attributes.h:84
SDValue BuildUDIV(SDNode *N, SelectionDAG &DAG, bool IsAfterLegalization, std::vector< SDNode * > *Created) const
Given an ISD::UDIV node expressing a divide by constant, return a DAG expression to select that will ...
const TargetLowering & getTargetLoweringInfo() const
Definition: SelectionDAG.h:282
#define DEBUG(X)
Definition: Debug.h:97
bool isInt(int64_t x)
isInt - Checks if an integer fits into the given bit width.
Definition: MathExtras.h:263
unsigned countLeadingZeros() const
The APInt version of the countLeadingZeros functions in MathExtras.h.
Definition: APInt.h:1340
virtual bool isLegalAddressingMode(const AddrMode &AM, Type *Ty) const
bool isPowerOf2_32(uint32_t Value)
Definition: MathExtras.h:354
APInt LLVM_ATTRIBUTE_UNUSED_RESULT zext(unsigned width) const
Zero extend to a new width.
Definition: APInt.cpp:983
static GCMetadataPrinterRegistry::Add< OcamlGCMetadataPrinter > Y("ocaml","ocaml 3.10-compatible collector")
bool operator==(uint64_t V1, const APInt &V2)
Definition: APInt.h:1684
static APInt getNullValue(unsigned numBits)
Get the '0' value.
Definition: APInt.h:457
static SDValue combineShlAddConstant(SDLoc DL, SDValue N0, SDValue N1, SelectionDAG &DAG)
SDNode * getUser()
getUser - This returns the SDNode that contains this Use.
TRUNCATE - Completely drop the high bits.
Definition: ISDOpcodes.h:363
bool isUIntN(unsigned N, uint64_t x)
Definition: MathExtras.h:315
unsigned getAlignment() const
MVT getSimpleValueType(unsigned ResNo) const
tier< T1, T2 > tie(T1 &f, T2 &s)
Definition: STLExtras.h:216
static EVT getIntegerVT(LLVMContext &Context, unsigned BitWidth)
Definition: ValueTypes.h:607
virtual bool isFNegFree(EVT VT) const
const fltSemantics & getSemantics() const
Definition: APFloat.h:397
static RegisterPass< NVPTXAllocaHoisting > X("alloca-hoisting","Hoisting alloca instructions in non-entry ""blocks to the entry block")
EVT changeVectorElementTypeToInteger() const
Definition: ValueTypes.h:626
INITIALIZE_PASS(GlobalMerge,"global-merge","Global Merge", false, false) bool GlobalMerge const DataLayout * TD
unsigned getResNo() const
getResNo - Convenience function for get().getResNo().
MVT getSimpleVT() const
Definition: ValueTypes.h:749
void CommitTargetLoweringOpt(const TargetLoweringOpt &TLO)
uint64_t getBaseAlignment() const
ISD::CondCode get() const
uint64_t getZExtValue() const
unsigned getVectorNumElements() const
Definition: ValueTypes.h:771
Function must be optimized for size first.
Definition: Attributes.h:77