1
0
mirror of https://github.com/RPCS3/llvm-mirror.git synced 2024-11-22 10:42:39 +01:00

[VE] Support atomic fence

Support atomic fence instruction and add a regression test.
Add MEMBARRIER pseudo insturction also to use it as a barrier
against to the compiler optimizations.

Reviewed By: simoll

Differential Revision: https://reviews.llvm.org/D90112
This commit is contained in:
Kazushi (Jam) Marukawa 2020-10-25 11:11:49 +09:00
parent fba008b8ac
commit 75e202d847
4 changed files with 134 additions and 0 deletions

View File

@ -817,6 +817,17 @@ VETargetLowering::VETargetLowering(const TargetMachine &TM,
/// } Floating-point math functions
/// Atomic instructions {
setMaxAtomicSizeInBitsSupported(64);
setMinCmpXchgSizeInBits(32);
setSupportsUnalignedAtomics(false);
// Use custom inserter for ATOMIC_FENCE.
setOperationAction(ISD::ATOMIC_FENCE, MVT::Other, Custom);
/// } Atomic isntructions
setStackPointerRegisterToSaveRestore(VE::SX11);
// We have target-specific dag combine patterns for the following nodes:
@ -843,6 +854,7 @@ const char *VETargetLowering::getTargetNodeName(unsigned Opcode) const {
TARGET_NODE_CASE(GETFUNPLT)
TARGET_NODE_CASE(GETSTACKTOP)
TARGET_NODE_CASE(GETTLSADDR)
TARGET_NODE_CASE(MEMBARRIER)
TARGET_NODE_CASE(CALL)
TARGET_NODE_CASE(RET_FLAG)
TARGET_NODE_CASE(GLOBAL_BASE_REG)
@ -945,6 +957,51 @@ SDValue VETargetLowering::makeAddress(SDValue Op, SelectionDAG &DAG) const {
/// Custom Lower {
SDValue VETargetLowering::lowerATOMIC_FENCE(SDValue Op,
SelectionDAG &DAG) const {
SDLoc DL(Op);
AtomicOrdering FenceOrdering = static_cast<AtomicOrdering>(
cast<ConstantSDNode>(Op.getOperand(1))->getZExtValue());
SyncScope::ID FenceSSID = static_cast<SyncScope::ID>(
cast<ConstantSDNode>(Op.getOperand(2))->getZExtValue());
// VE uses Release consistency, so need a fence instruction if it is a
// cross-thread fence.
if (FenceSSID == SyncScope::System) {
switch (FenceOrdering) {
case AtomicOrdering::NotAtomic:
case AtomicOrdering::Unordered:
case AtomicOrdering::Monotonic:
// No need to generate fencem instruction here.
break;
case AtomicOrdering::Acquire:
// Generate "fencem 2" as acquire fence.
return SDValue(DAG.getMachineNode(VE::FENCEM, DL, MVT::Other,
DAG.getTargetConstant(2, DL, MVT::i32),
Op.getOperand(0)),
0);
case AtomicOrdering::Release:
// Generate "fencem 1" as release fence.
return SDValue(DAG.getMachineNode(VE::FENCEM, DL, MVT::Other,
DAG.getTargetConstant(1, DL, MVT::i32),
Op.getOperand(0)),
0);
case AtomicOrdering::AcquireRelease:
case AtomicOrdering::SequentiallyConsistent:
// Generate "fencem 3" as acq_rel and seq_cst fence.
// FIXME: "fencem 3" doesn't wait for for PCIe deveices accesses,
// so seq_cst may require more instruction for them.
return SDValue(DAG.getMachineNode(VE::FENCEM, DL, MVT::Other,
DAG.getTargetConstant(3, DL, MVT::i32),
Op.getOperand(0)),
0);
}
}
// MEMBARRIER is a compiler barrier; it codegens to a no-op.
return DAG.getNode(VEISD::MEMBARRIER, DL, MVT::Other, Op.getOperand(0));
}
SDValue VETargetLowering::lowerGlobalAddress(SDValue Op,
SelectionDAG &DAG) const {
return makeAddress(Op, DAG);
@ -1263,6 +1320,8 @@ SDValue VETargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const {
switch (Op.getOpcode()) {
default:
llvm_unreachable("Should not custom lower this!");
case ISD::ATOMIC_FENCE:
return lowerATOMIC_FENCE(Op, DAG);
case ISD::BlockAddress:
return lowerBlockAddress(Op, DAG);
case ISD::ConstantPool:

View File

@ -32,6 +32,8 @@ enum NodeType : unsigned {
GETSTACKTOP, // retrieve address of stack top (first address of
// locals and temporaries)
MEMBARRIER, // Compiler barrier only; generate a no-op.
CALL, // A call instruction.
RET_FLAG, // Return with a flag operand.
GLOBAL_BASE_REG, // Global base reg for PIC.
@ -77,6 +79,7 @@ public:
/// Custom Lower {
SDValue LowerOperation(SDValue Op, SelectionDAG &DAG) const override;
SDValue lowerATOMIC_FENCE(SDValue Op, SelectionDAG &DAG) const;
SDValue lowerBlockAddress(SDValue Op, SelectionDAG &DAG) const;
SDValue lowerConstantPool(SDValue Op, SelectionDAG &DAG) const;
SDValue lowerDYNAMIC_STACKALLOC(SDValue Op, SelectionDAG &DAG) const;

View File

@ -442,6 +442,9 @@ def GetTLSAddr : SDNode<"VEISD::GETTLSADDR", SDT_SPCall,
def GetStackTop : SDNode<"VEISD::GETSTACKTOP", SDTNone,
[SDNPHasChain, SDNPSideEffect]>;
// MEMBARRIER
def MemBarrier : SDNode<"VEISD::MEMBARRIER", SDTNone,
[SDNPHasChain, SDNPSideEffect]>;
//===----------------------------------------------------------------------===//
// VE Flag Conditions
@ -1782,6 +1785,14 @@ def GETSTACKTOP : Pseudo<(outs I64:$dst), (ins),
"# GET STACK TOP",
[(set iPTR:$dst, (GetStackTop))]>;
// MEMBARRIER
let hasSideEffects = 1 in
def MEMBARRIER : Pseudo<(outs), (ins), "# MEMBARRIER", [(MemBarrier)] >;
//===----------------------------------------------------------------------===//
// Other patterns
//===----------------------------------------------------------------------===//
// SETCC pattern matches
//
// CMP %tmp, lhs, rhs ; compare lhs and rhs

View File

@ -0,0 +1,61 @@
; RUN: llc < %s -mtriple=ve | FileCheck %s
;;; Test atomic fence for all memory order
; Function Attrs: norecurse nounwind readnone
define void @_Z20atomic_fence_relaxedv() {
; CHECK-LABEL: _Z20atomic_fence_relaxedv:
; CHECK: .LBB{{[0-9]+}}_2:
; CHECK-NEXT: or %s11, 0, %s9
ret void
}
; Function Attrs: nofree norecurse nounwind
define void @_Z20atomic_fence_consumev() {
; CHECK-LABEL: _Z20atomic_fence_consumev:
; CHECK: .LBB{{[0-9]+}}_2:
; CHECK-NEXT: fencem 2
; CHECK-NEXT: or %s11, 0, %s9
fence acquire
ret void
}
; Function Attrs: nofree norecurse nounwind
define void @_Z20atomic_fence_acquirev() {
; CHECK-LABEL: _Z20atomic_fence_acquirev:
; CHECK: .LBB{{[0-9]+}}_2:
; CHECK-NEXT: fencem 2
; CHECK-NEXT: or %s11, 0, %s9
fence acquire
ret void
}
; Function Attrs: nofree norecurse nounwind
define void @_Z20atomic_fence_releasev() {
; CHECK-LABEL: _Z20atomic_fence_releasev:
; CHECK: .LBB{{[0-9]+}}_2:
; CHECK-NEXT: fencem 1
; CHECK-NEXT: or %s11, 0, %s9
fence release
ret void
}
; Function Attrs: nofree norecurse nounwind
define void @_Z20atomic_fence_acq_relv() {
; CHECK-LABEL: _Z20atomic_fence_acq_relv:
; CHECK: .LBB{{[0-9]+}}_2:
; CHECK-NEXT: fencem 3
; CHECK-NEXT: or %s11, 0, %s9
fence acq_rel
ret void
}
; Function Attrs: nofree norecurse nounwind
define void @_Z20atomic_fence_seq_cstv() {
; CHECK-LABEL: _Z20atomic_fence_seq_cstv:
; CHECK: .LBB{{[0-9]+}}_2:
; CHECK-NEXT: fencem 3
; CHECK-NEXT: or %s11, 0, %s9
fence seq_cst
ret void
}