From 75e202d84782cd7a7f5d3aea18821120edc37098 Mon Sep 17 00:00:00 2001 From: "Kazushi (Jam) Marukawa" Date: Sun, 25 Oct 2020 11:11:49 +0900 Subject: [PATCH] [VE] Support atomic fence Support atomic fence instruction and add a regression test. Add MEMBARRIER pseudo insturction also to use it as a barrier against to the compiler optimizations. Reviewed By: simoll Differential Revision: https://reviews.llvm.org/D90112 --- lib/Target/VE/VEISelLowering.cpp | 59 ++++++++++++++++++++++++++++++ lib/Target/VE/VEISelLowering.h | 3 ++ lib/Target/VE/VEInstrInfo.td | 11 ++++++ test/CodeGen/VE/atomic_fence.ll | 61 ++++++++++++++++++++++++++++++++ 4 files changed, 134 insertions(+) create mode 100644 test/CodeGen/VE/atomic_fence.ll diff --git a/lib/Target/VE/VEISelLowering.cpp b/lib/Target/VE/VEISelLowering.cpp index 150db3ab644..115540bb862 100644 --- a/lib/Target/VE/VEISelLowering.cpp +++ b/lib/Target/VE/VEISelLowering.cpp @@ -817,6 +817,17 @@ VETargetLowering::VETargetLowering(const TargetMachine &TM, /// } Floating-point math functions + /// Atomic instructions { + + setMaxAtomicSizeInBitsSupported(64); + setMinCmpXchgSizeInBits(32); + setSupportsUnalignedAtomics(false); + + // Use custom inserter for ATOMIC_FENCE. + setOperationAction(ISD::ATOMIC_FENCE, MVT::Other, Custom); + + /// } Atomic isntructions + setStackPointerRegisterToSaveRestore(VE::SX11); // We have target-specific dag combine patterns for the following nodes: @@ -843,6 +854,7 @@ const char *VETargetLowering::getTargetNodeName(unsigned Opcode) const { TARGET_NODE_CASE(GETFUNPLT) TARGET_NODE_CASE(GETSTACKTOP) TARGET_NODE_CASE(GETTLSADDR) + TARGET_NODE_CASE(MEMBARRIER) TARGET_NODE_CASE(CALL) TARGET_NODE_CASE(RET_FLAG) TARGET_NODE_CASE(GLOBAL_BASE_REG) @@ -945,6 +957,51 @@ SDValue VETargetLowering::makeAddress(SDValue Op, SelectionDAG &DAG) const { /// Custom Lower { +SDValue VETargetLowering::lowerATOMIC_FENCE(SDValue Op, + SelectionDAG &DAG) const { + SDLoc DL(Op); + AtomicOrdering FenceOrdering = static_cast( + cast(Op.getOperand(1))->getZExtValue()); + SyncScope::ID FenceSSID = static_cast( + cast(Op.getOperand(2))->getZExtValue()); + + // VE uses Release consistency, so need a fence instruction if it is a + // cross-thread fence. + if (FenceSSID == SyncScope::System) { + switch (FenceOrdering) { + case AtomicOrdering::NotAtomic: + case AtomicOrdering::Unordered: + case AtomicOrdering::Monotonic: + // No need to generate fencem instruction here. + break; + case AtomicOrdering::Acquire: + // Generate "fencem 2" as acquire fence. + return SDValue(DAG.getMachineNode(VE::FENCEM, DL, MVT::Other, + DAG.getTargetConstant(2, DL, MVT::i32), + Op.getOperand(0)), + 0); + case AtomicOrdering::Release: + // Generate "fencem 1" as release fence. + return SDValue(DAG.getMachineNode(VE::FENCEM, DL, MVT::Other, + DAG.getTargetConstant(1, DL, MVT::i32), + Op.getOperand(0)), + 0); + case AtomicOrdering::AcquireRelease: + case AtomicOrdering::SequentiallyConsistent: + // Generate "fencem 3" as acq_rel and seq_cst fence. + // FIXME: "fencem 3" doesn't wait for for PCIe deveices accesses, + // so seq_cst may require more instruction for them. + return SDValue(DAG.getMachineNode(VE::FENCEM, DL, MVT::Other, + DAG.getTargetConstant(3, DL, MVT::i32), + Op.getOperand(0)), + 0); + } + } + + // MEMBARRIER is a compiler barrier; it codegens to a no-op. + return DAG.getNode(VEISD::MEMBARRIER, DL, MVT::Other, Op.getOperand(0)); +} + SDValue VETargetLowering::lowerGlobalAddress(SDValue Op, SelectionDAG &DAG) const { return makeAddress(Op, DAG); @@ -1263,6 +1320,8 @@ SDValue VETargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const { switch (Op.getOpcode()) { default: llvm_unreachable("Should not custom lower this!"); + case ISD::ATOMIC_FENCE: + return lowerATOMIC_FENCE(Op, DAG); case ISD::BlockAddress: return lowerBlockAddress(Op, DAG); case ISD::ConstantPool: diff --git a/lib/Target/VE/VEISelLowering.h b/lib/Target/VE/VEISelLowering.h index 19c739eb8b4..d0fea010759 100644 --- a/lib/Target/VE/VEISelLowering.h +++ b/lib/Target/VE/VEISelLowering.h @@ -32,6 +32,8 @@ enum NodeType : unsigned { GETSTACKTOP, // retrieve address of stack top (first address of // locals and temporaries) + MEMBARRIER, // Compiler barrier only; generate a no-op. + CALL, // A call instruction. RET_FLAG, // Return with a flag operand. GLOBAL_BASE_REG, // Global base reg for PIC. @@ -77,6 +79,7 @@ public: /// Custom Lower { SDValue LowerOperation(SDValue Op, SelectionDAG &DAG) const override; + SDValue lowerATOMIC_FENCE(SDValue Op, SelectionDAG &DAG) const; SDValue lowerBlockAddress(SDValue Op, SelectionDAG &DAG) const; SDValue lowerConstantPool(SDValue Op, SelectionDAG &DAG) const; SDValue lowerDYNAMIC_STACKALLOC(SDValue Op, SelectionDAG &DAG) const; diff --git a/lib/Target/VE/VEInstrInfo.td b/lib/Target/VE/VEInstrInfo.td index 65dbb68ce17..05e2a86e4d2 100644 --- a/lib/Target/VE/VEInstrInfo.td +++ b/lib/Target/VE/VEInstrInfo.td @@ -442,6 +442,9 @@ def GetTLSAddr : SDNode<"VEISD::GETTLSADDR", SDT_SPCall, def GetStackTop : SDNode<"VEISD::GETSTACKTOP", SDTNone, [SDNPHasChain, SDNPSideEffect]>; +// MEMBARRIER +def MemBarrier : SDNode<"VEISD::MEMBARRIER", SDTNone, + [SDNPHasChain, SDNPSideEffect]>; //===----------------------------------------------------------------------===// // VE Flag Conditions @@ -1782,6 +1785,14 @@ def GETSTACKTOP : Pseudo<(outs I64:$dst), (ins), "# GET STACK TOP", [(set iPTR:$dst, (GetStackTop))]>; +// MEMBARRIER +let hasSideEffects = 1 in +def MEMBARRIER : Pseudo<(outs), (ins), "# MEMBARRIER", [(MemBarrier)] >; + +//===----------------------------------------------------------------------===// +// Other patterns +//===----------------------------------------------------------------------===// + // SETCC pattern matches // // CMP %tmp, lhs, rhs ; compare lhs and rhs diff --git a/test/CodeGen/VE/atomic_fence.ll b/test/CodeGen/VE/atomic_fence.ll new file mode 100644 index 00000000000..0835e6b6e53 --- /dev/null +++ b/test/CodeGen/VE/atomic_fence.ll @@ -0,0 +1,61 @@ +; RUN: llc < %s -mtriple=ve | FileCheck %s + +;;; Test atomic fence for all memory order + +; Function Attrs: norecurse nounwind readnone +define void @_Z20atomic_fence_relaxedv() { +; CHECK-LABEL: _Z20atomic_fence_relaxedv: +; CHECK: .LBB{{[0-9]+}}_2: +; CHECK-NEXT: or %s11, 0, %s9 + ret void +} + +; Function Attrs: nofree norecurse nounwind +define void @_Z20atomic_fence_consumev() { +; CHECK-LABEL: _Z20atomic_fence_consumev: +; CHECK: .LBB{{[0-9]+}}_2: +; CHECK-NEXT: fencem 2 +; CHECK-NEXT: or %s11, 0, %s9 + fence acquire + ret void +} + +; Function Attrs: nofree norecurse nounwind +define void @_Z20atomic_fence_acquirev() { +; CHECK-LABEL: _Z20atomic_fence_acquirev: +; CHECK: .LBB{{[0-9]+}}_2: +; CHECK-NEXT: fencem 2 +; CHECK-NEXT: or %s11, 0, %s9 + fence acquire + ret void +} + +; Function Attrs: nofree norecurse nounwind +define void @_Z20atomic_fence_releasev() { +; CHECK-LABEL: _Z20atomic_fence_releasev: +; CHECK: .LBB{{[0-9]+}}_2: +; CHECK-NEXT: fencem 1 +; CHECK-NEXT: or %s11, 0, %s9 + fence release + ret void +} + +; Function Attrs: nofree norecurse nounwind +define void @_Z20atomic_fence_acq_relv() { +; CHECK-LABEL: _Z20atomic_fence_acq_relv: +; CHECK: .LBB{{[0-9]+}}_2: +; CHECK-NEXT: fencem 3 +; CHECK-NEXT: or %s11, 0, %s9 + fence acq_rel + ret void +} + +; Function Attrs: nofree norecurse nounwind +define void @_Z20atomic_fence_seq_cstv() { +; CHECK-LABEL: _Z20atomic_fence_seq_cstv: +; CHECK: .LBB{{[0-9]+}}_2: +; CHECK-NEXT: fencem 3 +; CHECK-NEXT: or %s11, 0, %s9 + fence seq_cst + ret void +}