New approach to r136737: insert the necessary fences for atomic ops in platform-independent code, since a bunch of platforms (ARM, Mips, PPC, Alpha are the relevant targets here) need to do essentially the same thing.

I think this completes the basic CodeGen for atomicrmw and cmpxchg. llvm-svn: 136813
2024-11-24 03:33:20 +01:00 · 2011-08-03 21:06:02 +00:00 · 2011-08-03 21:06:02 +00:00 · afd08dcc2c
commit afd08dcc2c
parent 9db96fc81f
7 changed files with 94 additions and 86 deletions
--- a/include/llvm/Target/TargetLowering.h
+++ b/include/llvm/Target/TargetLowering.h
@ -714,6 +714,13 @@ public:
    return ShouldFoldAtomicFences;
  }
  /// getInsertFencesFor - return whether the DAG builder should automatically
  /// insert fences and reduce ordering for atomics.
  ///
  bool getInsertFencesForAtomic() const {
    return InsertFencesForAtomic;
  }
  /// getPreIndexedAddressParts - returns true by value, base pointer and
  /// offset pointer and addressing mode by reference if the node's address
  /// can be legally represented as pre-indexed load / store address.
@ -1134,6 +1141,13 @@ protected:
    ShouldFoldAtomicFences = fold;
  }
  /// setInsertFencesForAtomic - Set if the the DAG builder should
  /// automatically insert fences and reduce the order of atomic memory
  /// operations to Monotonic.
  void setInsertFencesForAtomic(bool fence) {
    InsertFencesForAtomic = fence;
  }
 public:
  //===--------------------------------------------------------------------===//
  // Lowering methods - These methods must be implemented by targets so that
@ -1673,6 +1687,11 @@ private:
  /// combiner.
  bool ShouldFoldAtomicFences;
  /// InsertFencesForAtomic - Whether the DAG builder should automatically
  /// insert fences and reduce ordering for atomics.  (This will be set for
  /// for most architectures with weak memory ordering.)
  bool InsertFencesForAtomic;
  /// StackPointerRegisterToSaveRestore - If set to a physical register, this
  /// specifies the register that llvm.savestack/llvm.restorestack should save
  /// and restore.
--- a/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp
+++ b/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp
@ -3237,22 +3237,59 @@ void SelectionDAGBuilder::visitStore(const StoreInst &I) {
  DAG.setRoot(StoreNode);
 }
 static SDValue InsertFenceForAtomic(SDValue Chain, AtomicOrdering Order,
                                    bool Before, DebugLoc dl,
                                    SelectionDAG &DAG,
                                    const TargetLowering &TLI) {
  // Fence, if necessary
  if (Before) {
    if (Order == AcquireRelease)
      Order = Release;
    else if (Order == Acquire || Order == Monotonic)
      return Chain;
  } else {
    if (Order == AcquireRelease)
      Order = Acquire;
    else if (Order == Release || Order == Monotonic)
      return Chain;
  }
  SDValue Ops[3];
  Ops[0] = Chain;
  Ops[1] = DAG.getConstant(SequentiallyConsistent, TLI.getPointerTy());
  Ops[2] = DAG.getConstant(Order, TLI.getPointerTy());
  return DAG.getNode(ISD::ATOMIC_FENCE, dl, MVT::Other, Ops, 3);
 }
 void SelectionDAGBuilder::visitAtomicCmpXchg(const AtomicCmpXchgInst &I) {
-  SDValue Root = getRoot();
+  DebugLoc dl = getCurDebugLoc();
  AtomicOrdering Order = I.getOrdering();
  SDValue InChain = getRoot();
  if (TLI.getInsertFencesForAtomic())
    InChain = InsertFenceForAtomic(InChain, Order, true, dl, DAG, TLI);
  SDValue L =
-    DAG.getAtomic(ISD::ATOMIC_CMP_SWAP, getCurDebugLoc(),
+    DAG.getAtomic(ISD::ATOMIC_CMP_SWAP, dl,
                  getValue(I.getCompareOperand()).getValueType().getSimpleVT(),
-                  Root,
+                  InChain,
                  getValue(I.getPointerOperand()),
                  getValue(I.getCompareOperand()),
                  getValue(I.getNewValOperand()),
                  MachinePointerInfo(I.getPointerOperand()), 0 /* Alignment */,
                  I.getOrdering(), I.getSynchScope());
  SDValue OutChain = L.getValue(1);
  if (TLI.getInsertFencesForAtomic())
    OutChain = InsertFenceForAtomic(OutChain, Order, false, dl, DAG, TLI);
  setValue(&I, L);
-  DAG.setRoot(L.getValue(1));
+  DAG.setRoot(OutChain);
 }
 void SelectionDAGBuilder::visitAtomicRMW(const AtomicRMWInst &I) {
  DebugLoc dl = getCurDebugLoc();
  ISD::NodeType NT;
  switch (I.getOperation()) {
  default: llvm_unreachable("Unknown atomicrmw operation"); return;
@ -3268,16 +3305,30 @@ void SelectionDAGBuilder::visitAtomicRMW(const AtomicRMWInst &I) {
  case AtomicRMWInst::UMax: NT = ISD::ATOMIC_LOAD_UMAX; break;
  case AtomicRMWInst::UMin: NT = ISD::ATOMIC_LOAD_UMIN; break;
  }
  AtomicOrdering Order = I.getOrdering();
  SDValue InChain = getRoot();
  if (TLI.getInsertFencesForAtomic())
    InChain = InsertFenceForAtomic(InChain, Order, true, dl, DAG, TLI);
  SDValue L =
-    DAG.getAtomic(NT, getCurDebugLoc(),
+    DAG.getAtomic(NT, dl,
                  getValue(I.getValOperand()).getValueType().getSimpleVT(),
-                  getRoot(),
+                  InChain,
                  getValue(I.getPointerOperand()),
                  getValue(I.getValOperand()),
                  I.getPointerOperand(), 0 /* Alignment */,
-                  I.getOrdering(), I.getSynchScope());
+                  TLI.getInsertFencesForAtomic() ? Monotonic : Order,
                  I.getSynchScope());
  SDValue OutChain = L.getValue(1);
  if (TLI.getInsertFencesForAtomic())
    OutChain = InsertFenceForAtomic(OutChain, Order, false, dl, DAG, TLI);
  setValue(&I, L);
-  DAG.setRoot(L.getValue(1));
+  DAG.setRoot(OutChain);
 }
 void SelectionDAGBuilder::visitFence(const FenceInst &I) {
--- a/lib/CodeGen/SelectionDAG/TargetLowering.cpp
+++ b/lib/CodeGen/SelectionDAG/TargetLowering.cpp
@ -617,6 +617,7 @@ TargetLowering::TargetLowering(const TargetMachine &tm,
  PrefLoopAlignment = 0;
  MinStackArgumentAlignment = 1;
  ShouldFoldAtomicFences = false;
  InsertFencesForAtomic = false;
  InitLibcallNames(LibcallRoutineNames);
  InitCmpLibcallCCs(CmpLibcallCCs);
--- a/lib/Target/ARM/ARMISelLowering.cpp
+++ b/lib/Target/ARM/ARMISelLowering.cpp
@ -602,18 +602,8 @@ ARMTargetLowering::ARMTargetLowering(TargetMachine &TM)
    // normally.
    setOperationAction(ISD::MEMBARRIER, MVT::Other, Custom);
    setOperationAction(ISD::ATOMIC_FENCE, MVT::Other, Custom);
-    setOperationAction(ISD::ATOMIC_CMP_SWAP,  MVT::i32, Custom);
+    // Automatically insert fences (dmb ist) around ATOMIC_SWAP etc.
-    setOperationAction(ISD::ATOMIC_SWAP,      MVT::i32, Custom);
+    setInsertFencesForAtomic(true);
    setOperationAction(ISD::ATOMIC_LOAD_ADD,  MVT::i32, Custom);
    setOperationAction(ISD::ATOMIC_LOAD_SUB,  MVT::i32, Custom);
    setOperationAction(ISD::ATOMIC_LOAD_AND,  MVT::i32, Custom);
    setOperationAction(ISD::ATOMIC_LOAD_OR,   MVT::i32, Custom);
    setOperationAction(ISD::ATOMIC_LOAD_XOR,  MVT::i32, Custom);
    setOperationAction(ISD::ATOMIC_LOAD_NAND, MVT::i32, Custom);
    setOperationAction(ISD::ATOMIC_LOAD_MIN,  MVT::i32, Custom);
    setOperationAction(ISD::ATOMIC_LOAD_MAX,  MVT::i32, Custom);
    setOperationAction(ISD::ATOMIC_LOAD_UMIN, MVT::i32, Custom);
    setOperationAction(ISD::ATOMIC_LOAD_UMAX, MVT::i32, Custom);
  } else {
    // Set them all for expansion, which will force libcalls.
    setOperationAction(ISD::MEMBARRIER, MVT::Other, Expand);
@ -2258,72 +2248,25 @@ static SDValue LowerMEMBARRIER(SDValue Op, SelectionDAG &DAG,
                     DAG.getConstant(DMBOpt, MVT::i32));
 }
-static SDValue getFence(SDValue InChain, DebugLoc dl, SelectionDAG &DAG,
+
-                        const ARMSubtarget *Subtarget) {
+static SDValue LowerATOMIC_FENCE(SDValue Op, SelectionDAG &DAG,
                                 const ARMSubtarget *Subtarget) {
  // FIXME: handle "fence singlethread" more efficiently.
  DebugLoc dl = Op.getDebugLoc();
  if (!Subtarget->hasDataBarrier()) {
    // Some ARMv6 cpus can support data barriers with an mcr instruction.
    // Thumb1 and pre-v6 ARM mode use a libcall instead and should never get
    // here.
    assert(Subtarget->hasV6Ops() && !Subtarget->isThumb() &&
           "Unexpected ISD::MEMBARRIER encountered. Should be libcall!");
-    return DAG.getNode(ARMISD::MEMBARRIER_MCR, dl, MVT::Other, InChain,
+    return DAG.getNode(ARMISD::MEMBARRIER_MCR, dl, MVT::Other, Op.getOperand(0),
                       DAG.getConstant(0, MVT::i32));
  }
-  return DAG.getNode(ARMISD::MEMBARRIER, dl, MVT::Other, InChain,
+  return DAG.getNode(ARMISD::MEMBARRIER, dl, MVT::Other, Op.getOperand(0),
                     DAG.getConstant(ARM_MB::ISH, MVT::i32));
 }
 static SDValue LowerATOMIC_FENCE(SDValue Op, SelectionDAG &DAG,
                                 const ARMSubtarget *Subtarget) {
  // FIXME: handle "fence singlethread" more efficiently.
  DebugLoc dl = Op.getDebugLoc();
  return getFence(Op.getOperand(0), dl, DAG, Subtarget);
 }
 static SDValue LowerAtomicMemOp(SDValue Op, SelectionDAG &DAG,
                                const ARMSubtarget *Subtarget) {
  DebugLoc dl = Op.getDebugLoc();
  int Order = cast<AtomicSDNode>(Op)->getOrdering();
  if (Order <= Monotonic)
    return Op;
  SDValue InChain = Op.getOperand(0);
  // Fence, if necessary
  if (Order == Release || Order >= AcquireRelease)
    InChain = getFence(InChain, dl, DAG, Subtarget);
  // Rather than mess with target-specific nodes, use the target-indepedent
  // node, and assume the DAGCombiner will not touch it post-legalize. 
  SDValue OutVal;
  if (Op.getOpcode() == ISD::ATOMIC_CMP_SWAP)
    OutVal = DAG.getAtomic(ISD::ATOMIC_CMP_SWAP, dl,
                           cast<AtomicSDNode>(Op)->getMemoryVT(),
                           InChain, Op.getOperand(1), Op.getOperand(2),
                           Op.getOperand(3),
                           cast<AtomicSDNode>(Op)->getMemOperand(),
                           Monotonic,
                           cast<AtomicSDNode>(Op)->getSynchScope());
  else
    OutVal = DAG.getAtomic(Op.getOpcode(), dl,
                           cast<AtomicSDNode>(Op)->getMemoryVT(),
                           InChain, Op.getOperand(1), Op.getOperand(2),
                           cast<AtomicSDNode>(Op)->getMemOperand(),
                           Monotonic,
                           cast<AtomicSDNode>(Op)->getSynchScope());
  SDValue OutChain = OutVal.getValue(1);
  // Fence, if necessary 
  if (Order == Acquire || Order >= AcquireRelease)
    OutChain = getFence(OutChain, dl, DAG, Subtarget);
  SDValue Ops[2] = { OutVal, OutChain };
  return DAG.getMergeValues(Ops, 2, dl);
 }
 static SDValue LowerPREFETCH(SDValue Op, SelectionDAG &DAG,
                             const ARMSubtarget *Subtarget) {
  // ARM pre v5TE and Thumb1 does not have preload instructions.
@ -4882,18 +4825,6 @@ SDValue ARMTargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const {
  case ISD::VASTART:       return LowerVASTART(Op, DAG);
  case ISD::MEMBARRIER:    return LowerMEMBARRIER(Op, DAG, Subtarget);
  case ISD::ATOMIC_FENCE:  return LowerATOMIC_FENCE(Op, DAG, Subtarget);
  case ISD::ATOMIC_CMP_SWAP:
  case ISD::ATOMIC_SWAP:
  case ISD::ATOMIC_LOAD_ADD:
  case ISD::ATOMIC_LOAD_SUB:
  case ISD::ATOMIC_LOAD_AND:
  case ISD::ATOMIC_LOAD_OR:
  case ISD::ATOMIC_LOAD_XOR:
  case ISD::ATOMIC_LOAD_NAND:
  case ISD::ATOMIC_LOAD_MIN:
  case ISD::ATOMIC_LOAD_MAX:
  case ISD::ATOMIC_LOAD_UMIN:
  case ISD::ATOMIC_LOAD_UMAX: return LowerAtomicMemOp(Op, DAG, Subtarget);
  case ISD::PREFETCH:      return LowerPREFETCH(Op, DAG, Subtarget);
  case ISD::SINT_TO_FP:
  case ISD::UINT_TO_FP:    return LowerINT_TO_FP(Op, DAG);
--- a/lib/Target/Alpha/AlphaISelLowering.cpp
+++ b/lib/Target/Alpha/AlphaISelLowering.cpp
@ -160,6 +160,8 @@ AlphaTargetLowering::AlphaTargetLowering(TargetMachine &TM)
  setMinFunctionAlignment(4);
  setInsertFencesForAtomic(true);
  computeRegisterProperties();
 }
--- a/lib/Target/Mips/MipsISelLowering.cpp
+++ b/lib/Target/Mips/MipsISelLowering.cpp
@ -164,6 +164,8 @@ MipsTargetLowering(MipsTargetMachine &TM)
  setOperationAction(ISD::MEMBARRIER,        MVT::Other, Custom);
  setOperationAction(ISD::ATOMIC_FENCE,      MVT::Other, Custom);  
  setInsertFencesForAtomic(true);
  if (Subtarget->isSingleFloat())
    setOperationAction(ISD::SELECT_CC, MVT::f64, Expand);
--- a/lib/Target/PowerPC/PPCISelLowering.cpp
+++ b/lib/Target/PowerPC/PPCISelLowering.cpp
@ -401,6 +401,8 @@ PPCTargetLowering::PPCTargetLowering(PPCTargetMachine &TM)
  if (PPCSubTarget.isDarwin())
    setPrefFunctionAlignment(4);
  setInsertFencesForAtomic(true);
  computeRegisterProperties();
 }