[SDAG] Update generic code to conservatively check for isAtomic in addition to isVolatile

This is the first sweep of generic code to add isAtomic bailouts where appropriate. The intention here is to have the switch from AtomicSDNode to LoadSDNode/StoreSDNode be close to NFC; that is, I'm not looking to allow additional optimizations at this time. That will come later. See D66309 for context. Differential Revision: https://reviews.llvm.org/D66318 llvm-svn: 371786
2024-10-21 20:12:56 +02:00 · 2019-09-12 22:49:17 +00:00 · 2019-09-12 22:49:17 +00:00 · ba1f39ccae
commit ba1f39ccae
parent 00330f5bfd
6 changed files with 122 additions and 111 deletions
--- a/include/llvm/Target/TargetSelectionDAG.td
+++ b/include/llvm/Target/TargetSelectionDAG.td
@ -1197,13 +1197,16 @@ def post_truncstvi16 : PatFrag<(ops node:$val, node:$base, node:$offset),
  let ScalarMemoryVT = i16;
 }

+// TODO: These need renamed to simple_store/simple_load and then split
+// into a volatile/atomic/ordered flavors so that respective transforms
+// can pick the right combination.
 def nonvolatile_load : PatFrag<(ops node:$ptr),
                               (load node:$ptr), [{
-  return !cast<LoadSDNode>(N)->isVolatile();
+  return cast<LoadSDNode>(N)->isSimple();
 }]>;
 def nonvolatile_store : PatFrag<(ops node:$val, node:$ptr),
                                (store node:$val, node:$ptr), [{
-  return !cast<StoreSDNode>(N)->isVolatile();
+  return cast<StoreSDNode>(N)->isSimple();
 }]>;

 // nontemporal store fragments.
--- a/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
+++ b/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
@ -4898,8 +4898,8 @@ bool DAGCombiner::isAndLoadExtLoad(ConstantSDNode *AndC, LoadSDNode *LoadN,
    return true;
  }

-  // Do not change the width of a volatile load.
-  if (LoadN->isVolatile())
+  // Do not change the width of a volatile or atomic loads.
+  if (!LoadN->isSimple())
    return false;

  // Do not generate loads of non-round integer types since these can
@ -4931,8 +4931,8 @@ bool DAGCombiner::isLegalNarrowLdSt(LSBaseSDNode *LDST,
  if (!MemVT.isRound())
    return false;

-  // Don't change the width of a volatile load.
-  if (LDST->isVolatile())
+  // Don't change the width of a volatile or atomic loads.
+  if (!LDST->isSimple())
    return false;

  // Verify that we are actually reducing a load width here.
@ -5519,7 +5519,7 @@ SDValue DAGCombiner::visitAND(SDNode *N) {
    unsigned MemBitSize = MemVT.getScalarSizeInBits();
    APInt ExtBits = APInt::getHighBitsSet(ExtBitSize, ExtBitSize - MemBitSize);
    if (DAG.MaskedValueIsZero(N1, ExtBits) &&
-        ((!LegalOperations && !LN0->isVolatile()) ||
+        ((!LegalOperations && LN0->isSimple()) ||
         TLI.isLoadExtLegal(ISD::ZEXTLOAD, VT, MemVT))) {
      SDValue ExtLoad =
          DAG.getExtLoad(ISD::ZEXTLOAD, SDLoc(N0), VT, LN0->getChain(),
@ -6613,7 +6613,7 @@ calculateByteProvider(SDValue Op, unsigned Index, unsigned Depth,
                                 Depth + 1);
  case ISD::LOAD: {
    auto L = cast<LoadSDNode>(Op.getNode());
-    if (L->isVolatile() || L->isIndexed())
+    if (!L->isSimple() || L->isIndexed())
      return None;

    unsigned NarrowBitWidth = L->getMemoryVT().getSizeInBits();
@ -6702,8 +6702,9 @@ SDValue DAGCombiner::MatchStoreCombine(StoreSDNode *N) {
  SDValue Chain;
  SmallVector<StoreSDNode *, 8> Stores;
  for (StoreSDNode *Store = N; Store; Store = dyn_cast<StoreSDNode>(Chain)) {
+    // TODO: Allow unordered atomics when wider type is legal (see D66309)
    if (Store->getMemoryVT() != MVT::i8 ||
-        Store->isVolatile() || Store->isIndexed())
+        !Store->isSimple() || Store->isIndexed())
      return SDValue();
    Stores.push_back(Store);
    Chain = Store->getChain();
@ -6914,7 +6915,8 @@ SDValue DAGCombiner::MatchLoadCombine(SDNode *N) {
      return SDValue();

    LoadSDNode *L = P->Load;
-    assert(L->hasNUsesOfValue(1, 0) && !L->isVolatile() && !L->isIndexed() &&
+    assert(L->hasNUsesOfValue(1, 0) && L->isSimple() &&
+           !L->isIndexed() &&
           "Must be enforced by calculateByteProvider");
    assert(L->getOffset().isUndef() && "Unindexed load must have undef offset");

@ -9244,8 +9246,9 @@ SDValue DAGCombiner::CombineExtLoad(SDNode *N) {
  LoadSDNode *LN0 = cast<LoadSDNode>(N0);

  if (!ISD::isNON_EXTLoad(LN0) || !ISD::isUNINDEXEDLoad(LN0) ||
-      !N0.hasOneUse() || LN0->isVolatile() || !DstVT.isVector() ||
-      !DstVT.isPow2VectorType() || !TLI.isVectorLoadExtDesirable(SDValue(N, 0)))
+      !N0.hasOneUse() || !LN0->isSimple() ||
+      !DstVT.isVector() || !DstVT.isPow2VectorType() ||
+      !TLI.isVectorLoadExtDesirable(SDValue(N, 0)))
    return SDValue();

  SmallVector<SDNode *, 4> SetCCs;
@ -9446,7 +9449,8 @@ static SDValue tryToFoldExtOfExtload(SelectionDAG &DAG, DAGCombiner &Combiner,

  LoadSDNode *LN0 = cast<LoadSDNode>(N0);
  EVT MemVT = LN0->getMemoryVT();
-  if ((LegalOperations || LN0->isVolatile() || VT.isVector()) &&
+  if ((LegalOperations || !LN0->isSimple() ||
+       VT.isVector()) &&
      !TLI.isLoadExtLegal(ExtLoadType, VT, MemVT))
    return SDValue();

@ -9471,7 +9475,7 @@ static SDValue tryToFoldExtOfLoad(SelectionDAG &DAG, DAGCombiner &Combiner,
  if (!ISD::isNON_EXTLoad(N0.getNode()) ||
      !ISD::isUNINDEXEDLoad(N0.getNode()) ||
      ((LegalOperations || VT.isVector() ||
-        cast<LoadSDNode>(N0)->isVolatile()) &&
+        !cast<LoadSDNode>(N0)->isSimple()) &&
       !TLI.isLoadExtLegal(ExtLoadType, VT, N0.getValueType())))
    return {};

@ -10547,7 +10551,7 @@ SDValue DAGCombiner::visitSIGN_EXTEND_INREG(SDNode *N) {
  if (ISD::isEXTLoad(N0.getNode()) &&
      ISD::isUNINDEXEDLoad(N0.getNode()) &&
      EVT == cast<LoadSDNode>(N0)->getMemoryVT() &&
-      ((!LegalOperations && !cast<LoadSDNode>(N0)->isVolatile() &&
+      ((!LegalOperations && cast<LoadSDNode>(N0)->isSimple() &&
        N0.hasOneUse()) ||
       TLI.isLoadExtLegal(ISD::SEXTLOAD, VT, EVT))) {
    LoadSDNode *LN0 = cast<LoadSDNode>(N0);
@ -10564,7 +10568,7 @@ SDValue DAGCombiner::visitSIGN_EXTEND_INREG(SDNode *N) {
  if (ISD::isZEXTLoad(N0.getNode()) && ISD::isUNINDEXEDLoad(N0.getNode()) &&
      N0.hasOneUse() &&
      EVT == cast<LoadSDNode>(N0)->getMemoryVT() &&
-      ((!LegalOperations && !cast<LoadSDNode>(N0)->isVolatile()) ||
+      ((!LegalOperations && cast<LoadSDNode>(N0)->isSimple()) &&
       TLI.isLoadExtLegal(ISD::SEXTLOAD, VT, EVT))) {
    LoadSDNode *LN0 = cast<LoadSDNode>(N0);
    SDValue ExtLoad = DAG.getExtLoad(ISD::SEXTLOAD, SDLoc(N), VT,
@ -10791,7 +10795,7 @@ SDValue DAGCombiner::visitTRUNCATE(SDNode *N) {
    // after truncation.
    if (N0.hasOneUse() && ISD::isUNINDEXEDLoad(N0.getNode())) {
      LoadSDNode *LN0 = cast<LoadSDNode>(N0);
-      if (!LN0->isVolatile() &&
+      if (LN0->isSimple() &&
          LN0->getMemoryVT().getStoreSizeInBits() < VT.getSizeInBits()) {
        SDValue NewLoad = DAG.getExtLoad(LN0->getExtensionType(), SDLoc(LN0),
                                         VT, LN0->getChain(), LN0->getBasePtr(),
@ -11085,7 +11089,7 @@ SDValue DAGCombiner::visitBITCAST(SDNode *N) {
      // memory accesses. We don't care if the original type was legal or not
      // as we assume software couldn't rely on the number of accesses of an
      // illegal type.
-      ((!LegalOperations && !cast<LoadSDNode>(N0)->isVolatile()) ||
+      ((!LegalOperations && cast<LoadSDNode>(N0)->isSimple()) ||
       TLI.isOperationLegal(ISD::LOAD, VT))) {
    LoadSDNode *LN0 = cast<LoadSDNode>(N0);

@ -14013,11 +14017,12 @@ bool DAGCombiner::extendLoadedValueToExtension(LoadSDNode *LD, SDValue &Val) {
 }

 SDValue DAGCombiner::ForwardStoreValueToDirectLoad(LoadSDNode *LD) {
-  if (OptLevel == CodeGenOpt::None || LD->isVolatile())
+  if (OptLevel == CodeGenOpt::None || !LD->isSimple())
    return SDValue();
  SDValue Chain = LD->getOperand(0);
  StoreSDNode *ST = dyn_cast<StoreSDNode>(Chain.getNode());
-  if (!ST || ST->isVolatile())
+  // TODO: Relax this restriction for unordered atomics (see D66309)
+  if (!ST || !ST->isSimple())
    return SDValue();

  EVT LDType = LD->getValueType(0);
@ -14116,7 +14121,8 @@ SDValue DAGCombiner::visitLOAD(SDNode *N) {
  // If load is not volatile and there are no uses of the loaded value (and
  // the updated indexed value in case of indexed loads), change uses of the
  // chain value into uses of the chain input (i.e. delete the dead load).
-  if (!LD->isVolatile()) {
+  // TODO: Allow this for unordered atomics (see D66309)
+  if (LD->isSimple()) {
    if (N->getValueType(1) == MVT::Other) {
      // Unindexed loads.
      if (!N->hasAnyUseOfValue(0)) {
@ -14687,7 +14693,7 @@ bool DAGCombiner::SliceUpLoad(SDNode *N) {
    return false;

  LoadSDNode *LD = cast<LoadSDNode>(N);
-  if (LD->isVolatile() || !ISD::isNormalLoad(LD) ||
+  if (!LD->isSimple() || !ISD::isNormalLoad(LD) ||
      !LD->getValueType(0).isInteger())
    return false;

@ -14918,7 +14924,7 @@ ShrinkLoadReplaceStoreWithStore(const std::pair<unsigned, unsigned> &MaskInfo,
 /// or code size.
 SDValue DAGCombiner::ReduceLoadOpStoreWidth(SDNode *N) {
  StoreSDNode *ST  = cast<StoreSDNode>(N);
-  if (ST->isVolatile())
+  if (!ST->isSimple())
    return SDValue();

  SDValue Chain = ST->getChain();
@ -15374,14 +15380,16 @@ void DAGCombiner::getStoreMergeCandidates(
    // Loads must only have one use.
    if (!Ld->hasNUsesOfValue(1, 0))
      return;
-    // The memory operands must not be volatile/indexed.
-    if (Ld->isVolatile() || Ld->isIndexed())
+    // The memory operands must not be volatile/indexed/atomic.
+    // TODO: May be able to relax for unordered atomics (see D66309)
+    if (!Ld->isSimple() || Ld->isIndexed())
      return;
  }
  auto CandidateMatch = [&](StoreSDNode *Other, BaseIndexOffset &Ptr,
                            int64_t &Offset) -> bool {
-    // The memory operands must not be volatile/indexed.
-    if (Other->isVolatile() || Other->isIndexed())
+    // The memory operands must not be volatile/indexed/atomic.
+    // TODO: May be able to relax for unordered atomics (see D66309)
+    if (!Other->isSimple() ||  Other->isIndexed())
      return false;
    // Don't mix temporal stores with non-temporal stores.
    if (St->isNonTemporal() != Other->isNonTemporal())
@ -15401,8 +15409,10 @@ void DAGCombiner::getStoreMergeCandidates(
        // Loads must only have one use.
        if (!OtherLd->hasNUsesOfValue(1, 0))
          return false;
-        // The memory operands must not be volatile/indexed.
-        if (OtherLd->isVolatile() || OtherLd->isIndexed())
+        // The memory operands must not be volatile/indexed/atomic.
+        // TODO: May be able to relax for unordered atomics (see D66309)
+        if (!OtherLd->isSimple() ||
+            OtherLd->isIndexed())
          return false;
        // Don't mix temporal loads with non-temporal loads.
        if (cast<LoadSDNode>(Val)->isNonTemporal() != OtherLd->isNonTemporal())
@ -16145,7 +16155,7 @@ SDValue DAGCombiner::replaceStoreOfFPConstant(StoreSDNode *ST) {
  case MVT::ppcf128:
    return SDValue();
  case MVT::f32:
-    if ((isTypeLegal(MVT::i32) && !LegalOperations && !ST->isVolatile()) ||
+    if ((isTypeLegal(MVT::i32) && !LegalOperations && ST->isSimple()) ||
        TLI.isOperationLegalOrCustom(ISD::STORE, MVT::i32)) {
      ;
      Tmp = DAG.getConstant((uint32_t)CFP->getValueAPF().
@ -16157,7 +16167,7 @@ SDValue DAGCombiner::replaceStoreOfFPConstant(StoreSDNode *ST) {
    return SDValue();
  case MVT::f64:
    if ((TLI.isTypeLegal(MVT::i64) && !LegalOperations &&
-         !ST->isVolatile()) ||
+         ST->isSimple()) ||
        TLI.isOperationLegalOrCustom(ISD::STORE, MVT::i64)) {
      ;
      Tmp = DAG.getConstant(CFP->getValueAPF().bitcastToAPInt().
@ -16166,7 +16176,7 @@ SDValue DAGCombiner::replaceStoreOfFPConstant(StoreSDNode *ST) {
                          Ptr, ST->getMemOperand());
    }

-    if (!ST->isVolatile() &&
+    if (ST->isSimple() &&
        TLI.isOperationLegalOrCustom(ISD::STORE, MVT::i32)) {
      // Many FP stores are not made apparent until after legalize, e.g. for
      // argument passing.  Since this is so common, custom legalize the
@ -16213,7 +16223,8 @@ SDValue DAGCombiner::visitSTORE(SDNode *N) {
    // memory accesses. We don't care if the original type was legal or not
    // as we assume software couldn't rely on the number of accesses of an
    // illegal type.
-    if (((!LegalOperations && !ST->isVolatile()) ||
+    // TODO: May be able to relax for unordered atomics (see D66309)
+    if (((!LegalOperations && ST->isSimple()) ||
         TLI.isOperationLegal(ISD::STORE, SVT)) &&
        TLI.isStoreBitCastBeneficial(Value.getValueType(), SVT,
                                     DAG, *ST->getMemOperand())) {
@ -16294,9 +16305,10 @@ SDValue DAGCombiner::visitSTORE(SDNode *N) {

  // If this is a load followed by a store to the same location, then the store
  // is dead/noop.
+  // TODO: Can relax for unordered atomics (see D66309)
  if (LoadSDNode *Ld = dyn_cast<LoadSDNode>(Value)) {
    if (Ld->getBasePtr() == Ptr && ST->getMemoryVT() == Ld->getMemoryVT() &&
-        ST->isUnindexed() && !ST->isVolatile() &&
+        ST->isUnindexed() && ST->isSimple() &&
        // There can't be any side effects between the load and store, such as
        // a call or store.
        Chain.reachesChainWithoutSideEffects(SDValue(Ld, 1))) {
@ -16305,9 +16317,10 @@ SDValue DAGCombiner::visitSTORE(SDNode *N) {
    }
  }

+  // TODO: Can relax for unordered atomics (see D66309)
  if (StoreSDNode *ST1 = dyn_cast<StoreSDNode>(Chain)) {
-    if (ST->isUnindexed() && !ST->isVolatile() && ST1->isUnindexed() &&
-        !ST1->isVolatile()) {
+    if (ST->isUnindexed() && ST->isSimple() &&
+        ST1->isUnindexed() && ST1->isSimple()) {
      if (ST1->getBasePtr() == Ptr && ST1->getValue() == Value &&
          ST->getMemoryVT() == ST1->getMemoryVT()) {
        // If this is a store followed by a store with the same value to the
@ -16436,7 +16449,8 @@ SDValue DAGCombiner::visitLIFETIME_END(SDNode *N) {
      break;
    case ISD::STORE: {
      StoreSDNode *ST = dyn_cast<StoreSDNode>(Chain);
-      if (ST->isVolatile() || ST->isIndexed())
+      // TODO: Can relax for unordered atomics (see D66309)
+      if (!ST->isSimple() || ST->isIndexed())
        continue;
      const BaseIndexOffset StoreBase = BaseIndexOffset::match(ST, DAG);
      // If we store purely within object bounds just before its lifetime ends,
@ -16745,7 +16759,7 @@ SDValue DAGCombiner::visitINSERT_VECTOR_ELT(SDNode *N) {
 SDValue DAGCombiner::scalarizeExtractedVectorLoad(SDNode *EVE, EVT InVecVT,
                                                  SDValue EltNo,
                                                  LoadSDNode *OriginalLoad) {
-  assert(!OriginalLoad->isVolatile());
+  assert(OriginalLoad->isSimple());

  EVT ResultVT = EVE->getValueType(0);
  EVT VecEltVT = InVecVT.getVectorElementType();
@ -17053,7 +17067,7 @@ SDValue DAGCombiner::visitEXTRACT_VECTOR_ELT(SDNode *N) {
      ISD::isNormalLoad(VecOp.getNode()) &&
      !Index->hasPredecessor(VecOp.getNode())) {
    auto *VecLoad = dyn_cast<LoadSDNode>(VecOp);
-    if (VecLoad && !VecLoad->isVolatile())
+    if (VecLoad && VecLoad->isSimple())
      return scalarizeExtractedVectorLoad(N, VecVT, Index, VecLoad);
  }

@ -17112,7 +17126,7 @@ SDValue DAGCombiner::visitEXTRACT_VECTOR_ELT(SDNode *N) {

  // Make sure we found a non-volatile load and the extractelement is
  // the only use.
-  if (!LN0 || !LN0->hasNUsesOfValue(1,0) || LN0->isVolatile())
+  if (!LN0 || !LN0->hasNUsesOfValue(1,0) || !LN0->isSimple())
    return SDValue();

  // If Idx was -1 above, Elt is going to be -1, so just return undef.
@ -18258,7 +18272,8 @@ static SDValue narrowExtractedVectorLoad(SDNode *Extract, SelectionDAG &DAG) {

  auto *Ld = dyn_cast<LoadSDNode>(Extract->getOperand(0));
  auto *ExtIdx = dyn_cast<ConstantSDNode>(Extract->getOperand(1));
-  if (!Ld || Ld->getExtensionType() || Ld->isVolatile() || !ExtIdx)
+  if (!Ld || Ld->getExtensionType() || !Ld->isSimple() ||
+      !ExtIdx)
    return SDValue();

  // Allow targets to opt-out.
@ -19831,7 +19846,9 @@ bool DAGCombiner::SimplifySelectOps(SDNode *TheSelect, SDValue LHS,
    // Token chains must be identical.
    if (LHS.getOperand(0) != RHS.getOperand(0) ||
        // Do not let this transformation reduce the number of volatile loads.
-        LLD->isVolatile() || RLD->isVolatile() ||
+        // Be conservative for atomics for the moment
+        // TODO: This does appear to be legal for unordered atomics (see D66309)
+        !LLD->isSimple() || !RLD->isSimple() ||
        // FIXME: If either is a pre/post inc/dec load,
        // we'd need to split out the address adjustment.
        LLD->isIndexed() || RLD->isIndexed() ||
@ -20533,6 +20550,7 @@ bool DAGCombiner::isAlias(SDNode *Op0, SDNode *Op1) const {

  struct MemUseCharacteristics {
    bool IsVolatile;
+    bool IsAtomic;
    SDValue BasePtr;
    int64_t Offset;
    Optional<int64_t> NumBytes;
@ -20548,18 +20566,20 @@ bool DAGCombiner::isAlias(SDNode *Op0, SDNode *Op1) const {
                     : (LSN->getAddressingMode() == ISD::PRE_DEC)
                           ? -1 * C->getSExtValue()
                           : 0;
-      return {LSN->isVolatile(), LSN->getBasePtr(), Offset /*base offset*/,
+      return {LSN->isVolatile(), LSN->isAtomic(), LSN->getBasePtr(),
+              Offset /*base offset*/,
              Optional<int64_t>(LSN->getMemoryVT().getStoreSize()),
              LSN->getMemOperand()};
    }
    if (const auto *LN = cast<LifetimeSDNode>(N))
-      return {false /*isVolatile*/, LN->getOperand(1),
+      return {false /*isVolatile*/, /*isAtomic*/ false, LN->getOperand(1),
              (LN->hasOffset()) ? LN->getOffset() : 0,
              (LN->hasOffset()) ? Optional<int64_t>(LN->getSize())
                                : Optional<int64_t>(),
              (MachineMemOperand *)nullptr};
    // Default.
-    return {false /*isvolatile*/, SDValue(), (int64_t)0 /*offset*/,
+    return {false /*isvolatile*/, /*isAtomic*/ false, SDValue(),
+            (int64_t)0 /*offset*/,
            Optional<int64_t>() /*size*/, (MachineMemOperand *)nullptr};
  };

@ -20575,6 +20595,11 @@ bool DAGCombiner::isAlias(SDNode *Op0, SDNode *Op1) const {
  if (MUC0.IsVolatile && MUC1.IsVolatile)
    return true;

+  // Be conservative about atomics for the moment
+  // TODO: This is way overconservative for unordered atomics (see D66309)
+  if (MUC0.IsAtomic && MUC1.IsAtomic)
+    return true;
+
  if (MUC0.MMO && MUC1.MMO) {
    if ((MUC0.MMO->isInvariant() && MUC1.MMO->isStore()) ||
        (MUC1.MMO->isInvariant() && MUC0.MMO->isStore()))
@ -20656,7 +20681,8 @@ void DAGCombiner::GatherAllAliases(SDNode *N, SDValue OriginalChain,
  SmallPtrSet<SDNode *, 16> Visited;  // Visited node set.

  // Get alias information for node.
-  const bool IsLoad = isa<LoadSDNode>(N) && !cast<LoadSDNode>(N)->isVolatile();
+  // TODO: relax aliasing for unordered atomics (see D66309)
+  const bool IsLoad = isa<LoadSDNode>(N) && cast<LoadSDNode>(N)->isSimple();

  // Starting off.
  Chains.push_back(OriginalChain);
@ -20672,8 +20698,9 @@ void DAGCombiner::GatherAllAliases(SDNode *N, SDValue OriginalChain,
    case ISD::LOAD:
    case ISD::STORE: {
      // Get alias information for C.
+      // TODO: Relax aliasing for unordered atomics (see D66309)
      bool IsOpLoad = isa<LoadSDNode>(C.getNode()) &&
-                      !cast<LSBaseSDNode>(C.getNode())->isVolatile();
+                      cast<LSBaseSDNode>(C.getNode())->isSimple();
      if ((IsLoad && IsOpLoad) || !isAlias(N, C.getNode())) {
        // Look further up the chain.
        C = C.getOperand(0);
@ -20828,7 +20855,8 @@ bool DAGCombiner::parallelizeChainedStores(StoreSDNode *St) {
    // If the chain has more than one use, then we can't reorder the mem ops.
    if (!SDValue(Chain, 0)->hasOneUse())
      break;
-    if (Chain->isVolatile() || Chain->isIndexed())
+    // TODO: Relax for unordered atomics (see D66309)
+    if (!Chain->isSimple() || Chain->isIndexed())
      break;

    // Find the base pointer and offset for this memory node.
--- a/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp
+++ b/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp
@ -4772,7 +4772,7 @@ SDValue DAGTypeLegalizer::GenWidenVectorLoads(SmallVectorImpl<SDValue> &LdChain,

  int LdWidth = LdVT.getSizeInBits();
  int WidthDiff = WidenWidth - LdWidth;
-  unsigned LdAlign = LD->isVolatile() ? 0 : Align; // Allow wider loads.
+  unsigned LdAlign = (!LD->isSimple()) ? 0 : Align; // Allow wider loads.

  // Find the vector type that can load from.
  EVT NewVT = FindMemType(DAG, TLI, LdWidth, WidenVT, LdAlign, WidthDiff);
--- a/lib/CodeGen/SelectionDAG/SelectionDAG.cpp
+++ b/lib/CodeGen/SelectionDAG/SelectionDAG.cpp
@ -8973,7 +8973,7 @@ bool SDValue::reachesChainWithoutSideEffects(SDValue Dest,

  // Loads don't have side effects, look through them.
  if (LoadSDNode *Ld = dyn_cast<LoadSDNode>(*this)) {
-    if (!Ld->isVolatile())
+    if (Ld->isUnordered())
      return Ld->getChain().reachesChainWithoutSideEffects(Dest, Depth-1);
  }
  return false;
@ -9211,6 +9211,9 @@ bool SelectionDAG::areNonVolatileConsecutiveLoads(LoadSDNode *LD,
                                                  int Dist) const {
  if (LD->isVolatile() || Base->isVolatile())
    return false;
+  // TODO: probably too restrictive for atomics, revisit
+  if (!LD->isSimple())
+    return false;
  if (LD->isIndexed() || Base->isIndexed())
    return false;
  if (LD->getChain() != Base->getChain())
--- a/lib/CodeGen/SelectionDAG/TargetLowering.cpp
+++ b/lib/CodeGen/SelectionDAG/TargetLowering.cpp
@ -3224,7 +3224,7 @@ SDValue TargetLowering::SimplifySetCC(EVT VT, SDValue N0, SDValue N1,
      LoadSDNode *Lod = cast<LoadSDNode>(N0.getOperand(0));
      APInt bestMask;
      unsigned bestWidth = 0, bestOffset = 0;
-      if (!Lod->isVolatile() && Lod->isUnindexed()) {
+      if (Lod->isSimple() && Lod->isUnindexed()) {
        unsigned origWidth = N0.getValueSizeInBits();
        unsigned maskWidth = origWidth;
        // We can narrow (e.g.) 16-bit extending loads on 32-bit target to
--- a/test/CodeGen/X86/atomic-unordered.ll
+++ b/test/CodeGen/X86/atomic-unordered.ll
@ -604,16 +604,11 @@ define void @widen_broadcast2_unaligned(i32* %p0, <2 x i32> %vec) {

 ; Legal if wider type is also atomic (TODO)
 define void @widen_zero_init(i32* %p0, i32 %v1, i32 %v2) {
-; CHECK-NOX-LABEL: widen_zero_init:
-; CHECK-NOX:       # %bb.0:
-; CHECK-NOX-NEXT:    movl $0, (%rdi)
-; CHECK-NOX-NEXT:    movl $0, 4(%rdi)
-; CHECK-NOX-NEXT:    retq
-;
-; CHECK-EX-LABEL: widen_zero_init:
-; CHECK-EX:       # %bb.0:
-; CHECK-EX-NEXT:    movq $0, (%rdi)
-; CHECK-EX-NEXT:    retq
+; CHECK-LABEL: widen_zero_init:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    movl $0, (%rdi)
+; CHECK-NEXT:    movl $0, 4(%rdi)
+; CHECK-NEXT:    retq
  %p1 = getelementptr i32, i32* %p0, i64 1
  store atomic i32 0, i32* %p0 unordered, align 8
  store atomic i32 0, i32* %p1 unordered, align 4
@ -622,16 +617,11 @@ define void @widen_zero_init(i32* %p0, i32 %v1, i32 %v2) {

 ; Not legal to widen due to alignment restriction
 define void @widen_zero_init_unaligned(i32* %p0, i32 %v1, i32 %v2) {
-; CHECK-NOX-LABEL: widen_zero_init_unaligned:
-; CHECK-NOX:       # %bb.0:
-; CHECK-NOX-NEXT:    movl $0, (%rdi)
-; CHECK-NOX-NEXT:    movl $0, 4(%rdi)
-; CHECK-NOX-NEXT:    retq
-;
-; CHECK-EX-LABEL: widen_zero_init_unaligned:
-; CHECK-EX:       # %bb.0:
-; CHECK-EX-NEXT:    movq $0, (%rdi)
-; CHECK-EX-NEXT:    retq
+; CHECK-LABEL: widen_zero_init_unaligned:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    movl $0, (%rdi)
+; CHECK-NEXT:    movl $0, 4(%rdi)
+; CHECK-NEXT:    retq
  %p1 = getelementptr i32, i32* %p0, i64 1
  store atomic i32 0, i32* %p0 unordered, align 4
  store atomic i32 0, i32* %p1 unordered, align 4
@ -1449,7 +1439,7 @@ define i64 @load_fold_shl3(i64* %p1, i64* %p2) {
 ;
 ; CHECK-EX-LABEL: load_fold_shl3:
 ; CHECK-EX:       # %bb.0:
-; CHECK-EX-NEXT:    movb (%rsi), %al
+; CHECK-EX-NEXT:    movq (%rsi), %rax
 ; CHECK-EX-NEXT:    shlxq %rax, (%rdi), %rax
 ; CHECK-EX-NEXT:    retq
  %v = load atomic i64, i64* %p1 unordered, align 8
@ -1510,7 +1500,7 @@ define i64 @load_fold_lshr3(i64* %p1, i64* %p2) {
 ;
 ; CHECK-EX-LABEL: load_fold_lshr3:
 ; CHECK-EX:       # %bb.0:
-; CHECK-EX-NEXT:    movb (%rsi), %al
+; CHECK-EX-NEXT:    movq (%rsi), %rax
 ; CHECK-EX-NEXT:    shrxq %rax, (%rdi), %rax
 ; CHECK-EX-NEXT:    retq
  %v = load atomic i64, i64* %p1 unordered, align 8
@ -1571,7 +1561,7 @@ define i64 @load_fold_ashr3(i64* %p1, i64* %p2) {
 ;
 ; CHECK-EX-LABEL: load_fold_ashr3:
 ; CHECK-EX:       # %bb.0:
-; CHECK-EX-NEXT:    movb (%rsi), %al
+; CHECK-EX-NEXT:    movq (%rsi), %rax
 ; CHECK-EX-NEXT:    sarxq %rax, (%rdi), %rax
 ; CHECK-EX-NEXT:    retq
  %v = load atomic i64, i64* %p1 unordered, align 8
@ -2694,16 +2684,11 @@ define void @rmw_fold_xor2(i64* %p, i64 %v) {

 ; Legal to reduce the load width (TODO)
 define i32 @fold_trunc(i64* %p) {
-; CHECK-NOX-LABEL: fold_trunc:
-; CHECK-NOX:       # %bb.0:
-; CHECK-NOX-NEXT:    movq (%rdi), %rax
-; CHECK-NOX-NEXT:    # kill: def $eax killed $eax killed $rax
-; CHECK-NOX-NEXT:    retq
-;
-; CHECK-EX-LABEL: fold_trunc:
-; CHECK-EX:       # %bb.0:
-; CHECK-EX-NEXT:    movl (%rdi), %eax
-; CHECK-EX-NEXT:    retq
+; CHECK-LABEL: fold_trunc:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    movq (%rdi), %rax
+; CHECK-NEXT:    # kill: def $eax killed $eax killed $rax
+; CHECK-NEXT:    retq
  %v = load atomic i64, i64* %p unordered, align 8
  %ret = trunc i64 %v to i32
  ret i32 %ret
@ -2727,8 +2712,9 @@ define i32 @fold_trunc_add(i64* %p, i32 %v2) {
 ;
 ; CHECK-EX-LABEL: fold_trunc_add:
 ; CHECK-EX:       # %bb.0:
-; CHECK-EX-NEXT:    movl %esi, %eax
-; CHECK-EX-NEXT:    addl (%rdi), %eax
+; CHECK-EX-NEXT:    movq (%rdi), %rax
+; CHECK-EX-NEXT:    addl %esi, %eax
+; CHECK-EX-NEXT:    # kill: def $eax killed $eax killed $rax
 ; CHECK-EX-NEXT:    retq
  %v = load atomic i64, i64* %p unordered, align 8
  %trunc = trunc i64 %v to i32
@ -2754,8 +2740,9 @@ define i32 @fold_trunc_and(i64* %p, i32 %v2) {
 ;
 ; CHECK-EX-LABEL: fold_trunc_and:
 ; CHECK-EX:       # %bb.0:
-; CHECK-EX-NEXT:    movl %esi, %eax
-; CHECK-EX-NEXT:    andl (%rdi), %eax
+; CHECK-EX-NEXT:    movq (%rdi), %rax
+; CHECK-EX-NEXT:    andl %esi, %eax
+; CHECK-EX-NEXT:    # kill: def $eax killed $eax killed $rax
 ; CHECK-EX-NEXT:    retq
  %v = load atomic i64, i64* %p unordered, align 8
  %trunc = trunc i64 %v to i32
@ -2781,8 +2768,9 @@ define i32 @fold_trunc_or(i64* %p, i32 %v2) {
 ;
 ; CHECK-EX-LABEL: fold_trunc_or:
 ; CHECK-EX:       # %bb.0:
-; CHECK-EX-NEXT:    movl %esi, %eax
-; CHECK-EX-NEXT:    orl (%rdi), %eax
+; CHECK-EX-NEXT:    movq (%rdi), %rax
+; CHECK-EX-NEXT:    orl %esi, %eax
+; CHECK-EX-NEXT:    # kill: def $eax killed $eax killed $rax
 ; CHECK-EX-NEXT:    retq
  %v = load atomic i64, i64* %p unordered, align 8
  %trunc = trunc i64 %v to i32
@ -2864,17 +2852,11 @@ define i64 @load_forwarding(i64* %p) {

 ; Legal to forward (TODO)
 define i64 @store_forward(i64* %p, i64 %v) {
-; CHECK-NOX-LABEL: store_forward:
-; CHECK-NOX:       # %bb.0:
-; CHECK-NOX-NEXT:    movq %rsi, (%rdi)
-; CHECK-NOX-NEXT:    movq (%rdi), %rax
-; CHECK-NOX-NEXT:    retq
-;
-; CHECK-EX-LABEL: store_forward:
-; CHECK-EX:       # %bb.0:
-; CHECK-EX-NEXT:    movq %rsi, %rax
-; CHECK-EX-NEXT:    movq %rsi, (%rdi)
-; CHECK-EX-NEXT:    retq
+; CHECK-LABEL: store_forward:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    movq %rsi, (%rdi)
+; CHECK-NEXT:    movq (%rdi), %rax
+; CHECK-NEXT:    retq
  store atomic i64 %v, i64* %p unordered, align 8
  %ret = load atomic i64, i64* %p unordered, align 8
  ret i64 %ret
@ -2894,16 +2876,11 @@ define void @dead_writeback(i64* %p) {

 ; Legal to kill (TODO)
 define void @dead_store(i64* %p, i64 %v) {
-; CHECK-NOX-LABEL: dead_store:
-; CHECK-NOX:       # %bb.0:
-; CHECK-NOX-NEXT:    movq $0, (%rdi)
-; CHECK-NOX-NEXT:    movq %rsi, (%rdi)
-; CHECK-NOX-NEXT:    retq
-;
-; CHECK-EX-LABEL: dead_store:
-; CHECK-EX:       # %bb.0:
-; CHECK-EX-NEXT:    movq %rsi, (%rdi)
-; CHECK-EX-NEXT:    retq
+; CHECK-LABEL: dead_store:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    movq $0, (%rdi)
+; CHECK-NEXT:    movq %rsi, (%rdi)
+; CHECK-NEXT:    retq
  store atomic i64 0, i64* %p unordered, align 8
  store atomic i64 %v, i64* %p unordered, align 8
  ret void