diff --git a/include/llvm/Target/TargetLowering.h b/include/llvm/Target/TargetLowering.h index 3484a79c737..8daec8ee48e 100644 --- a/include/llvm/Target/TargetLowering.h +++ b/include/llvm/Target/TargetLowering.h @@ -714,6 +714,13 @@ public: return ShouldFoldAtomicFences; } + /// getInsertFencesFor - return whether the DAG builder should automatically + /// insert fences and reduce ordering for atomics. + /// + bool getInsertFencesForAtomic() const { + return InsertFencesForAtomic; + } + /// getPreIndexedAddressParts - returns true by value, base pointer and /// offset pointer and addressing mode by reference if the node's address /// can be legally represented as pre-indexed load / store address. @@ -1134,6 +1141,13 @@ protected: ShouldFoldAtomicFences = fold; } + /// setInsertFencesForAtomic - Set if the the DAG builder should + /// automatically insert fences and reduce the order of atomic memory + /// operations to Monotonic. + void setInsertFencesForAtomic(bool fence) { + InsertFencesForAtomic = fence; + } + public: //===--------------------------------------------------------------------===// // Lowering methods - These methods must be implemented by targets so that @@ -1673,6 +1687,11 @@ private: /// combiner. bool ShouldFoldAtomicFences; + /// InsertFencesForAtomic - Whether the DAG builder should automatically + /// insert fences and reduce ordering for atomics. (This will be set for + /// for most architectures with weak memory ordering.) + bool InsertFencesForAtomic; + /// StackPointerRegisterToSaveRestore - If set to a physical register, this /// specifies the register that llvm.savestack/llvm.restorestack should save /// and restore. diff --git a/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp b/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp index 449f87ebdab..bedc310dbbb 100644 --- a/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp +++ b/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp @@ -3237,22 +3237,59 @@ void SelectionDAGBuilder::visitStore(const StoreInst &I) { DAG.setRoot(StoreNode); } +static SDValue InsertFenceForAtomic(SDValue Chain, AtomicOrdering Order, + bool Before, DebugLoc dl, + SelectionDAG &DAG, + const TargetLowering &TLI) { + // Fence, if necessary + if (Before) { + if (Order == AcquireRelease) + Order = Release; + else if (Order == Acquire || Order == Monotonic) + return Chain; + } else { + if (Order == AcquireRelease) + Order = Acquire; + else if (Order == Release || Order == Monotonic) + return Chain; + } + SDValue Ops[3]; + Ops[0] = Chain; + Ops[1] = DAG.getConstant(SequentiallyConsistent, TLI.getPointerTy()); + Ops[2] = DAG.getConstant(Order, TLI.getPointerTy()); + return DAG.getNode(ISD::ATOMIC_FENCE, dl, MVT::Other, Ops, 3); +} + void SelectionDAGBuilder::visitAtomicCmpXchg(const AtomicCmpXchgInst &I) { - SDValue Root = getRoot(); + DebugLoc dl = getCurDebugLoc(); + AtomicOrdering Order = I.getOrdering(); + + SDValue InChain = getRoot(); + + if (TLI.getInsertFencesForAtomic()) + InChain = InsertFenceForAtomic(InChain, Order, true, dl, DAG, TLI); + SDValue L = - DAG.getAtomic(ISD::ATOMIC_CMP_SWAP, getCurDebugLoc(), + DAG.getAtomic(ISD::ATOMIC_CMP_SWAP, dl, getValue(I.getCompareOperand()).getValueType().getSimpleVT(), - Root, + InChain, getValue(I.getPointerOperand()), getValue(I.getCompareOperand()), getValue(I.getNewValOperand()), MachinePointerInfo(I.getPointerOperand()), 0 /* Alignment */, I.getOrdering(), I.getSynchScope()); + + SDValue OutChain = L.getValue(1); + + if (TLI.getInsertFencesForAtomic()) + OutChain = InsertFenceForAtomic(OutChain, Order, false, dl, DAG, TLI); + setValue(&I, L); - DAG.setRoot(L.getValue(1)); + DAG.setRoot(OutChain); } void SelectionDAGBuilder::visitAtomicRMW(const AtomicRMWInst &I) { + DebugLoc dl = getCurDebugLoc(); ISD::NodeType NT; switch (I.getOperation()) { default: llvm_unreachable("Unknown atomicrmw operation"); return; @@ -3268,16 +3305,30 @@ void SelectionDAGBuilder::visitAtomicRMW(const AtomicRMWInst &I) { case AtomicRMWInst::UMax: NT = ISD::ATOMIC_LOAD_UMAX; break; case AtomicRMWInst::UMin: NT = ISD::ATOMIC_LOAD_UMIN; break; } + AtomicOrdering Order = I.getOrdering(); + + SDValue InChain = getRoot(); + + if (TLI.getInsertFencesForAtomic()) + InChain = InsertFenceForAtomic(InChain, Order, true, dl, DAG, TLI); + SDValue L = - DAG.getAtomic(NT, getCurDebugLoc(), + DAG.getAtomic(NT, dl, getValue(I.getValOperand()).getValueType().getSimpleVT(), - getRoot(), + InChain, getValue(I.getPointerOperand()), getValue(I.getValOperand()), I.getPointerOperand(), 0 /* Alignment */, - I.getOrdering(), I.getSynchScope()); + TLI.getInsertFencesForAtomic() ? Monotonic : Order, + I.getSynchScope()); + + SDValue OutChain = L.getValue(1); + + if (TLI.getInsertFencesForAtomic()) + OutChain = InsertFenceForAtomic(OutChain, Order, false, dl, DAG, TLI); + setValue(&I, L); - DAG.setRoot(L.getValue(1)); + DAG.setRoot(OutChain); } void SelectionDAGBuilder::visitFence(const FenceInst &I) { diff --git a/lib/CodeGen/SelectionDAG/TargetLowering.cpp b/lib/CodeGen/SelectionDAG/TargetLowering.cpp index 335eca787ff..49b0f1b8ef1 100644 --- a/lib/CodeGen/SelectionDAG/TargetLowering.cpp +++ b/lib/CodeGen/SelectionDAG/TargetLowering.cpp @@ -617,6 +617,7 @@ TargetLowering::TargetLowering(const TargetMachine &tm, PrefLoopAlignment = 0; MinStackArgumentAlignment = 1; ShouldFoldAtomicFences = false; + InsertFencesForAtomic = false; InitLibcallNames(LibcallRoutineNames); InitCmpLibcallCCs(CmpLibcallCCs); diff --git a/lib/Target/ARM/ARMISelLowering.cpp b/lib/Target/ARM/ARMISelLowering.cpp index ed5e3ab6ca8..60de3e9b1dd 100644 --- a/lib/Target/ARM/ARMISelLowering.cpp +++ b/lib/Target/ARM/ARMISelLowering.cpp @@ -602,18 +602,8 @@ ARMTargetLowering::ARMTargetLowering(TargetMachine &TM) // normally. setOperationAction(ISD::MEMBARRIER, MVT::Other, Custom); setOperationAction(ISD::ATOMIC_FENCE, MVT::Other, Custom); - setOperationAction(ISD::ATOMIC_CMP_SWAP, MVT::i32, Custom); - setOperationAction(ISD::ATOMIC_SWAP, MVT::i32, Custom); - setOperationAction(ISD::ATOMIC_LOAD_ADD, MVT::i32, Custom); - setOperationAction(ISD::ATOMIC_LOAD_SUB, MVT::i32, Custom); - setOperationAction(ISD::ATOMIC_LOAD_AND, MVT::i32, Custom); - setOperationAction(ISD::ATOMIC_LOAD_OR, MVT::i32, Custom); - setOperationAction(ISD::ATOMIC_LOAD_XOR, MVT::i32, Custom); - setOperationAction(ISD::ATOMIC_LOAD_NAND, MVT::i32, Custom); - setOperationAction(ISD::ATOMIC_LOAD_MIN, MVT::i32, Custom); - setOperationAction(ISD::ATOMIC_LOAD_MAX, MVT::i32, Custom); - setOperationAction(ISD::ATOMIC_LOAD_UMIN, MVT::i32, Custom); - setOperationAction(ISD::ATOMIC_LOAD_UMAX, MVT::i32, Custom); + // Automatically insert fences (dmb ist) around ATOMIC_SWAP etc. + setInsertFencesForAtomic(true); } else { // Set them all for expansion, which will force libcalls. setOperationAction(ISD::MEMBARRIER, MVT::Other, Expand); @@ -2258,72 +2248,25 @@ static SDValue LowerMEMBARRIER(SDValue Op, SelectionDAG &DAG, DAG.getConstant(DMBOpt, MVT::i32)); } -static SDValue getFence(SDValue InChain, DebugLoc dl, SelectionDAG &DAG, - const ARMSubtarget *Subtarget) { + +static SDValue LowerATOMIC_FENCE(SDValue Op, SelectionDAG &DAG, + const ARMSubtarget *Subtarget) { + // FIXME: handle "fence singlethread" more efficiently. + DebugLoc dl = Op.getDebugLoc(); if (!Subtarget->hasDataBarrier()) { // Some ARMv6 cpus can support data barriers with an mcr instruction. // Thumb1 and pre-v6 ARM mode use a libcall instead and should never get // here. assert(Subtarget->hasV6Ops() && !Subtarget->isThumb() && "Unexpected ISD::MEMBARRIER encountered. Should be libcall!"); - return DAG.getNode(ARMISD::MEMBARRIER_MCR, dl, MVT::Other, InChain, + return DAG.getNode(ARMISD::MEMBARRIER_MCR, dl, MVT::Other, Op.getOperand(0), DAG.getConstant(0, MVT::i32)); } - return DAG.getNode(ARMISD::MEMBARRIER, dl, MVT::Other, InChain, + return DAG.getNode(ARMISD::MEMBARRIER, dl, MVT::Other, Op.getOperand(0), DAG.getConstant(ARM_MB::ISH, MVT::i32)); } -static SDValue LowerATOMIC_FENCE(SDValue Op, SelectionDAG &DAG, - const ARMSubtarget *Subtarget) { - // FIXME: handle "fence singlethread" more efficiently. - DebugLoc dl = Op.getDebugLoc(); - return getFence(Op.getOperand(0), dl, DAG, Subtarget); -} - -static SDValue LowerAtomicMemOp(SDValue Op, SelectionDAG &DAG, - const ARMSubtarget *Subtarget) { - DebugLoc dl = Op.getDebugLoc(); - int Order = cast(Op)->getOrdering(); - if (Order <= Monotonic) - return Op; - - SDValue InChain = Op.getOperand(0); - - // Fence, if necessary - if (Order == Release || Order >= AcquireRelease) - InChain = getFence(InChain, dl, DAG, Subtarget); - - // Rather than mess with target-specific nodes, use the target-indepedent - // node, and assume the DAGCombiner will not touch it post-legalize. - SDValue OutVal; - if (Op.getOpcode() == ISD::ATOMIC_CMP_SWAP) - OutVal = DAG.getAtomic(ISD::ATOMIC_CMP_SWAP, dl, - cast(Op)->getMemoryVT(), - InChain, Op.getOperand(1), Op.getOperand(2), - Op.getOperand(3), - cast(Op)->getMemOperand(), - Monotonic, - cast(Op)->getSynchScope()); - else - OutVal = DAG.getAtomic(Op.getOpcode(), dl, - cast(Op)->getMemoryVT(), - InChain, Op.getOperand(1), Op.getOperand(2), - cast(Op)->getMemOperand(), - Monotonic, - cast(Op)->getSynchScope()); - - SDValue OutChain = OutVal.getValue(1); - - // Fence, if necessary - if (Order == Acquire || Order >= AcquireRelease) - OutChain = getFence(OutChain, dl, DAG, Subtarget); - - SDValue Ops[2] = { OutVal, OutChain }; - return DAG.getMergeValues(Ops, 2, dl); -} - - static SDValue LowerPREFETCH(SDValue Op, SelectionDAG &DAG, const ARMSubtarget *Subtarget) { // ARM pre v5TE and Thumb1 does not have preload instructions. @@ -4882,18 +4825,6 @@ SDValue ARMTargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const { case ISD::VASTART: return LowerVASTART(Op, DAG); case ISD::MEMBARRIER: return LowerMEMBARRIER(Op, DAG, Subtarget); case ISD::ATOMIC_FENCE: return LowerATOMIC_FENCE(Op, DAG, Subtarget); - case ISD::ATOMIC_CMP_SWAP: - case ISD::ATOMIC_SWAP: - case ISD::ATOMIC_LOAD_ADD: - case ISD::ATOMIC_LOAD_SUB: - case ISD::ATOMIC_LOAD_AND: - case ISD::ATOMIC_LOAD_OR: - case ISD::ATOMIC_LOAD_XOR: - case ISD::ATOMIC_LOAD_NAND: - case ISD::ATOMIC_LOAD_MIN: - case ISD::ATOMIC_LOAD_MAX: - case ISD::ATOMIC_LOAD_UMIN: - case ISD::ATOMIC_LOAD_UMAX: return LowerAtomicMemOp(Op, DAG, Subtarget); case ISD::PREFETCH: return LowerPREFETCH(Op, DAG, Subtarget); case ISD::SINT_TO_FP: case ISD::UINT_TO_FP: return LowerINT_TO_FP(Op, DAG); diff --git a/lib/Target/Alpha/AlphaISelLowering.cpp b/lib/Target/Alpha/AlphaISelLowering.cpp index de003fb4c65..8fb63400304 100644 --- a/lib/Target/Alpha/AlphaISelLowering.cpp +++ b/lib/Target/Alpha/AlphaISelLowering.cpp @@ -160,6 +160,8 @@ AlphaTargetLowering::AlphaTargetLowering(TargetMachine &TM) setMinFunctionAlignment(4); + setInsertFencesForAtomic(true); + computeRegisterProperties(); } diff --git a/lib/Target/Mips/MipsISelLowering.cpp b/lib/Target/Mips/MipsISelLowering.cpp index 0dac7897f59..54fa2d40b01 100644 --- a/lib/Target/Mips/MipsISelLowering.cpp +++ b/lib/Target/Mips/MipsISelLowering.cpp @@ -164,6 +164,8 @@ MipsTargetLowering(MipsTargetMachine &TM) setOperationAction(ISD::MEMBARRIER, MVT::Other, Custom); setOperationAction(ISD::ATOMIC_FENCE, MVT::Other, Custom); + setInsertFencesForAtomic(true); + if (Subtarget->isSingleFloat()) setOperationAction(ISD::SELECT_CC, MVT::f64, Expand); diff --git a/lib/Target/PowerPC/PPCISelLowering.cpp b/lib/Target/PowerPC/PPCISelLowering.cpp index ff4f1997cca..78a75f96816 100644 --- a/lib/Target/PowerPC/PPCISelLowering.cpp +++ b/lib/Target/PowerPC/PPCISelLowering.cpp @@ -401,6 +401,8 @@ PPCTargetLowering::PPCTargetLowering(PPCTargetMachine &TM) if (PPCSubTarget.isDarwin()) setPrefFunctionAlignment(4); + setInsertFencesForAtomic(true); + computeRegisterProperties(); }