mirror of
https://github.com/RPCS3/llvm-mirror.git
synced 2024-11-22 10:42:39 +01:00
[VE] Support atomic fence
Support atomic fence instruction and add a regression test. Add MEMBARRIER pseudo insturction also to use it as a barrier against to the compiler optimizations. Reviewed By: simoll Differential Revision: https://reviews.llvm.org/D90112
This commit is contained in:
parent
fba008b8ac
commit
75e202d847
@ -817,6 +817,17 @@ VETargetLowering::VETargetLowering(const TargetMachine &TM,
|
||||
|
||||
/// } Floating-point math functions
|
||||
|
||||
/// Atomic instructions {
|
||||
|
||||
setMaxAtomicSizeInBitsSupported(64);
|
||||
setMinCmpXchgSizeInBits(32);
|
||||
setSupportsUnalignedAtomics(false);
|
||||
|
||||
// Use custom inserter for ATOMIC_FENCE.
|
||||
setOperationAction(ISD::ATOMIC_FENCE, MVT::Other, Custom);
|
||||
|
||||
/// } Atomic isntructions
|
||||
|
||||
setStackPointerRegisterToSaveRestore(VE::SX11);
|
||||
|
||||
// We have target-specific dag combine patterns for the following nodes:
|
||||
@ -843,6 +854,7 @@ const char *VETargetLowering::getTargetNodeName(unsigned Opcode) const {
|
||||
TARGET_NODE_CASE(GETFUNPLT)
|
||||
TARGET_NODE_CASE(GETSTACKTOP)
|
||||
TARGET_NODE_CASE(GETTLSADDR)
|
||||
TARGET_NODE_CASE(MEMBARRIER)
|
||||
TARGET_NODE_CASE(CALL)
|
||||
TARGET_NODE_CASE(RET_FLAG)
|
||||
TARGET_NODE_CASE(GLOBAL_BASE_REG)
|
||||
@ -945,6 +957,51 @@ SDValue VETargetLowering::makeAddress(SDValue Op, SelectionDAG &DAG) const {
|
||||
|
||||
/// Custom Lower {
|
||||
|
||||
SDValue VETargetLowering::lowerATOMIC_FENCE(SDValue Op,
|
||||
SelectionDAG &DAG) const {
|
||||
SDLoc DL(Op);
|
||||
AtomicOrdering FenceOrdering = static_cast<AtomicOrdering>(
|
||||
cast<ConstantSDNode>(Op.getOperand(1))->getZExtValue());
|
||||
SyncScope::ID FenceSSID = static_cast<SyncScope::ID>(
|
||||
cast<ConstantSDNode>(Op.getOperand(2))->getZExtValue());
|
||||
|
||||
// VE uses Release consistency, so need a fence instruction if it is a
|
||||
// cross-thread fence.
|
||||
if (FenceSSID == SyncScope::System) {
|
||||
switch (FenceOrdering) {
|
||||
case AtomicOrdering::NotAtomic:
|
||||
case AtomicOrdering::Unordered:
|
||||
case AtomicOrdering::Monotonic:
|
||||
// No need to generate fencem instruction here.
|
||||
break;
|
||||
case AtomicOrdering::Acquire:
|
||||
// Generate "fencem 2" as acquire fence.
|
||||
return SDValue(DAG.getMachineNode(VE::FENCEM, DL, MVT::Other,
|
||||
DAG.getTargetConstant(2, DL, MVT::i32),
|
||||
Op.getOperand(0)),
|
||||
0);
|
||||
case AtomicOrdering::Release:
|
||||
// Generate "fencem 1" as release fence.
|
||||
return SDValue(DAG.getMachineNode(VE::FENCEM, DL, MVT::Other,
|
||||
DAG.getTargetConstant(1, DL, MVT::i32),
|
||||
Op.getOperand(0)),
|
||||
0);
|
||||
case AtomicOrdering::AcquireRelease:
|
||||
case AtomicOrdering::SequentiallyConsistent:
|
||||
// Generate "fencem 3" as acq_rel and seq_cst fence.
|
||||
// FIXME: "fencem 3" doesn't wait for for PCIe deveices accesses,
|
||||
// so seq_cst may require more instruction for them.
|
||||
return SDValue(DAG.getMachineNode(VE::FENCEM, DL, MVT::Other,
|
||||
DAG.getTargetConstant(3, DL, MVT::i32),
|
||||
Op.getOperand(0)),
|
||||
0);
|
||||
}
|
||||
}
|
||||
|
||||
// MEMBARRIER is a compiler barrier; it codegens to a no-op.
|
||||
return DAG.getNode(VEISD::MEMBARRIER, DL, MVT::Other, Op.getOperand(0));
|
||||
}
|
||||
|
||||
SDValue VETargetLowering::lowerGlobalAddress(SDValue Op,
|
||||
SelectionDAG &DAG) const {
|
||||
return makeAddress(Op, DAG);
|
||||
@ -1263,6 +1320,8 @@ SDValue VETargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const {
|
||||
switch (Op.getOpcode()) {
|
||||
default:
|
||||
llvm_unreachable("Should not custom lower this!");
|
||||
case ISD::ATOMIC_FENCE:
|
||||
return lowerATOMIC_FENCE(Op, DAG);
|
||||
case ISD::BlockAddress:
|
||||
return lowerBlockAddress(Op, DAG);
|
||||
case ISD::ConstantPool:
|
||||
|
@ -32,6 +32,8 @@ enum NodeType : unsigned {
|
||||
GETSTACKTOP, // retrieve address of stack top (first address of
|
||||
// locals and temporaries)
|
||||
|
||||
MEMBARRIER, // Compiler barrier only; generate a no-op.
|
||||
|
||||
CALL, // A call instruction.
|
||||
RET_FLAG, // Return with a flag operand.
|
||||
GLOBAL_BASE_REG, // Global base reg for PIC.
|
||||
@ -77,6 +79,7 @@ public:
|
||||
/// Custom Lower {
|
||||
SDValue LowerOperation(SDValue Op, SelectionDAG &DAG) const override;
|
||||
|
||||
SDValue lowerATOMIC_FENCE(SDValue Op, SelectionDAG &DAG) const;
|
||||
SDValue lowerBlockAddress(SDValue Op, SelectionDAG &DAG) const;
|
||||
SDValue lowerConstantPool(SDValue Op, SelectionDAG &DAG) const;
|
||||
SDValue lowerDYNAMIC_STACKALLOC(SDValue Op, SelectionDAG &DAG) const;
|
||||
|
@ -442,6 +442,9 @@ def GetTLSAddr : SDNode<"VEISD::GETTLSADDR", SDT_SPCall,
|
||||
def GetStackTop : SDNode<"VEISD::GETSTACKTOP", SDTNone,
|
||||
[SDNPHasChain, SDNPSideEffect]>;
|
||||
|
||||
// MEMBARRIER
|
||||
def MemBarrier : SDNode<"VEISD::MEMBARRIER", SDTNone,
|
||||
[SDNPHasChain, SDNPSideEffect]>;
|
||||
|
||||
//===----------------------------------------------------------------------===//
|
||||
// VE Flag Conditions
|
||||
@ -1782,6 +1785,14 @@ def GETSTACKTOP : Pseudo<(outs I64:$dst), (ins),
|
||||
"# GET STACK TOP",
|
||||
[(set iPTR:$dst, (GetStackTop))]>;
|
||||
|
||||
// MEMBARRIER
|
||||
let hasSideEffects = 1 in
|
||||
def MEMBARRIER : Pseudo<(outs), (ins), "# MEMBARRIER", [(MemBarrier)] >;
|
||||
|
||||
//===----------------------------------------------------------------------===//
|
||||
// Other patterns
|
||||
//===----------------------------------------------------------------------===//
|
||||
|
||||
// SETCC pattern matches
|
||||
//
|
||||
// CMP %tmp, lhs, rhs ; compare lhs and rhs
|
||||
|
61
test/CodeGen/VE/atomic_fence.ll
Normal file
61
test/CodeGen/VE/atomic_fence.ll
Normal file
@ -0,0 +1,61 @@
|
||||
; RUN: llc < %s -mtriple=ve | FileCheck %s
|
||||
|
||||
;;; Test atomic fence for all memory order
|
||||
|
||||
; Function Attrs: norecurse nounwind readnone
|
||||
define void @_Z20atomic_fence_relaxedv() {
|
||||
; CHECK-LABEL: _Z20atomic_fence_relaxedv:
|
||||
; CHECK: .LBB{{[0-9]+}}_2:
|
||||
; CHECK-NEXT: or %s11, 0, %s9
|
||||
ret void
|
||||
}
|
||||
|
||||
; Function Attrs: nofree norecurse nounwind
|
||||
define void @_Z20atomic_fence_consumev() {
|
||||
; CHECK-LABEL: _Z20atomic_fence_consumev:
|
||||
; CHECK: .LBB{{[0-9]+}}_2:
|
||||
; CHECK-NEXT: fencem 2
|
||||
; CHECK-NEXT: or %s11, 0, %s9
|
||||
fence acquire
|
||||
ret void
|
||||
}
|
||||
|
||||
; Function Attrs: nofree norecurse nounwind
|
||||
define void @_Z20atomic_fence_acquirev() {
|
||||
; CHECK-LABEL: _Z20atomic_fence_acquirev:
|
||||
; CHECK: .LBB{{[0-9]+}}_2:
|
||||
; CHECK-NEXT: fencem 2
|
||||
; CHECK-NEXT: or %s11, 0, %s9
|
||||
fence acquire
|
||||
ret void
|
||||
}
|
||||
|
||||
; Function Attrs: nofree norecurse nounwind
|
||||
define void @_Z20atomic_fence_releasev() {
|
||||
; CHECK-LABEL: _Z20atomic_fence_releasev:
|
||||
; CHECK: .LBB{{[0-9]+}}_2:
|
||||
; CHECK-NEXT: fencem 1
|
||||
; CHECK-NEXT: or %s11, 0, %s9
|
||||
fence release
|
||||
ret void
|
||||
}
|
||||
|
||||
; Function Attrs: nofree norecurse nounwind
|
||||
define void @_Z20atomic_fence_acq_relv() {
|
||||
; CHECK-LABEL: _Z20atomic_fence_acq_relv:
|
||||
; CHECK: .LBB{{[0-9]+}}_2:
|
||||
; CHECK-NEXT: fencem 3
|
||||
; CHECK-NEXT: or %s11, 0, %s9
|
||||
fence acq_rel
|
||||
ret void
|
||||
}
|
||||
|
||||
; Function Attrs: nofree norecurse nounwind
|
||||
define void @_Z20atomic_fence_seq_cstv() {
|
||||
; CHECK-LABEL: _Z20atomic_fence_seq_cstv:
|
||||
; CHECK: .LBB{{[0-9]+}}_2:
|
||||
; CHECK-NEXT: fencem 3
|
||||
; CHECK-NEXT: or %s11, 0, %s9
|
||||
fence seq_cst
|
||||
ret void
|
||||
}
|
Loading…
Reference in New Issue
Block a user