mirror of
https://github.com/RPCS3/llvm-mirror.git
synced 2024-11-26 12:43:36 +01:00
[Power] Use AtomicExpandPass for fence insertion, and use lwsync where appropriate
Summary: This patch makes use of AtomicExpandPass in Power for inserting fences around atomic as part of an effort to remove fence insertion from SelectionDAGBuilder. As a big bonus, it lets us use sync 1 (lightweight sync, often used by the mnemonic lwsync) instead of sync 0 (heavyweight sync) in many cases. I also added a test, as there was no test for the barriers emitted by the Power backend for atomic loads and stores. Test Plan: new test + make check-all Reviewers: jfb Subscribers: llvm-commits Differential Revision: http://reviews.llvm.org/D5180 llvm-svn: 218331
This commit is contained in:
parent
a7da34c778
commit
64053dff5f
@ -28,8 +28,10 @@ let TargetPrefix = "ppc" in { // All intrinsics start with "llvm.ppc.".
|
||||
def int_ppc_dcbz : Intrinsic<[], [llvm_ptr_ty], []>;
|
||||
def int_ppc_dcbzl : Intrinsic<[], [llvm_ptr_ty], []>;
|
||||
|
||||
// sync instruction
|
||||
// sync instruction (i.e. sync 0, a.k.a hwsync)
|
||||
def int_ppc_sync : Intrinsic<[], [], []>;
|
||||
// lwsync is sync 1
|
||||
def int_ppc_lwsync : Intrinsic<[], [], []>;
|
||||
|
||||
// Intrinsics used to generate ctr-based loops. These should only be
|
||||
// generated by the PowerPC backend!
|
||||
|
@ -6538,6 +6538,38 @@ void PPCTargetLowering::ReplaceNodeResults(SDNode *N,
|
||||
// Other Lowering Code
|
||||
//===----------------------------------------------------------------------===//
|
||||
|
||||
static Instruction* callIntrinsic(IRBuilder<> &Builder, Intrinsic::ID Id) {
|
||||
Module *M = Builder.GetInsertBlock()->getParent()->getParent();
|
||||
Function *Func = Intrinsic::getDeclaration(M, Id);
|
||||
return Builder.CreateCall(Func);
|
||||
}
|
||||
|
||||
// The mappings for emitLeading/TrailingFence is taken from
|
||||
// http://www.cl.cam.ac.uk/~pes20/cpp/cpp0xmappings.html
|
||||
Instruction* PPCTargetLowering::emitLeadingFence(IRBuilder<> &Builder,
|
||||
AtomicOrdering Ord, bool IsStore,
|
||||
bool IsLoad) const {
|
||||
if (Ord == SequentiallyConsistent)
|
||||
return callIntrinsic(Builder, Intrinsic::ppc_sync);
|
||||
else if (isAtLeastRelease(Ord))
|
||||
return callIntrinsic(Builder, Intrinsic::ppc_lwsync);
|
||||
else
|
||||
return nullptr;
|
||||
}
|
||||
|
||||
Instruction* PPCTargetLowering::emitTrailingFence(IRBuilder<> &Builder,
|
||||
AtomicOrdering Ord, bool IsStore,
|
||||
bool IsLoad) const {
|
||||
if (IsLoad && isAtLeastAcquire(Ord))
|
||||
return callIntrinsic(Builder, Intrinsic::ppc_lwsync);
|
||||
// FIXME: this is too conservative, a dependent branch + isync is enough.
|
||||
// See http://www.cl.cam.ac.uk/~pes20/cpp/cpp0xmappings.html and
|
||||
// http://www.rdrop.com/users/paulmck/scalability/paper/N2745r.2011.03.04a.html
|
||||
// and http://www.cl.cam.ac.uk/~pes20/cppppc/ for justification.
|
||||
else
|
||||
return nullptr;
|
||||
}
|
||||
|
||||
MachineBasicBlock *
|
||||
PPCTargetLowering::EmitAtomicBinary(MachineInstr *MI, MachineBasicBlock *BB,
|
||||
bool is64bit, unsigned BinOpcode) const {
|
||||
|
@ -413,6 +413,11 @@ namespace llvm {
|
||||
const SelectionDAG &DAG,
|
||||
unsigned Depth = 0) const override;
|
||||
|
||||
Instruction* emitLeadingFence(IRBuilder<> &Builder, AtomicOrdering Ord,
|
||||
bool IsStore, bool IsLoad) const override;
|
||||
Instruction* emitTrailingFence(IRBuilder<> &Builder, AtomicOrdering Ord,
|
||||
bool IsStore, bool IsLoad) const override;
|
||||
|
||||
MachineBasicBlock *
|
||||
EmitInstrWithCustomInserter(MachineInstr *MI,
|
||||
MachineBasicBlock *MBB) const override;
|
||||
|
@ -1704,8 +1704,13 @@ let isCodeGenOnly = 1 in {
|
||||
}
|
||||
}
|
||||
|
||||
def : Pat<(int_ppc_sync), (SYNC 0)>, Requires<[IsNotBookE]>;
|
||||
def : Pat<(int_ppc_sync), (MSYNC)>, Requires<[IsBookE]>;
|
||||
// FIXME: We should use a specific flag to check for the presence of the sync
|
||||
// instruction instead of relying on IsBookE.
|
||||
// For example, the PPC A2 supports SYNC despite being a BookE processor.
|
||||
def : Pat<(int_ppc_sync), (SYNC 0)>, Requires<[IsNotBookE]>;
|
||||
def : Pat<(int_ppc_lwsync), (SYNC 1)>, Requires<[IsNotBookE]>;
|
||||
def : Pat<(int_ppc_sync), (MSYNC)>, Requires<[IsBookE]>;
|
||||
def : Pat<(int_ppc_lwsync), (MSYNC)>, Requires<[IsBookE]>;
|
||||
|
||||
//===----------------------------------------------------------------------===//
|
||||
// PPC32 Arithmetic Instructions.
|
||||
|
@ -86,6 +86,7 @@ public:
|
||||
return *getPPCTargetMachine().getSubtargetImpl();
|
||||
}
|
||||
|
||||
void addIRPasses() override;
|
||||
bool addPreISel() override;
|
||||
bool addILPOpts() override;
|
||||
bool addInstSelector() override;
|
||||
@ -99,6 +100,11 @@ TargetPassConfig *PPCTargetMachine::createPassConfig(PassManagerBase &PM) {
|
||||
return new PPCPassConfig(this, PM);
|
||||
}
|
||||
|
||||
void PPCPassConfig::addIRPasses() {
|
||||
addPass(createAtomicExpandPass(&getPPCTargetMachine()));
|
||||
TargetPassConfig::addIRPasses();
|
||||
}
|
||||
|
||||
bool PPCPassConfig::addPreISel() {
|
||||
if (!DisableCTRLoops && getOptLevel() != CodeGenOpt::None)
|
||||
addPass(createPPCCTRLoops(getPPCTargetMachine()));
|
||||
|
125
test/CodeGen/PowerPC/atomics.ll
Normal file
125
test/CodeGen/PowerPC/atomics.ll
Normal file
@ -0,0 +1,125 @@
|
||||
; RUN: llc < %s -mtriple=powerpc-apple-darwin -march=ppc32 -verify-machineinstrs | FileCheck %s --check-prefix=CHECK --check-prefix=PPC32
|
||||
; FIXME: -verify-machineinstrs currently fail on ppc64 (mismatched register/instruction).
|
||||
; This is already checked for in Atomics-64.ll
|
||||
; RUN: llc < %s -mtriple=powerpc-apple-darwin -march=ppc64 | FileCheck %s --check-prefix=CHECK --check-prefix=PPC64
|
||||
|
||||
; FIXME: we don't currently check for the operations themselves with CHECK-NEXT,
|
||||
; because they are implemented in a very messy way with lwarx/stwcx.
|
||||
; It should be fixed soon in another patch.
|
||||
|
||||
; We first check loads, for all sizes from i8 to i64.
|
||||
; We also vary orderings to check for barriers.
|
||||
define i8 @load_i8_unordered(i8* %mem) {
|
||||
; CHECK-LABEL: load_i8_unordered
|
||||
; CHECK-NOT: sync
|
||||
%val = load atomic i8* %mem unordered, align 1
|
||||
ret i8 %val
|
||||
}
|
||||
define i16 @load_i16_monotonic(i16* %mem) {
|
||||
; CHECK-LABEL: load_i16_monotonic
|
||||
; CHECK-NOT: sync
|
||||
%val = load atomic i16* %mem monotonic, align 2
|
||||
ret i16 %val
|
||||
}
|
||||
define i32 @load_i32_acquire(i32* %mem) {
|
||||
; CHECK-LABEL: load_i32_acquire
|
||||
%val = load atomic i32* %mem acquire, align 4
|
||||
; CHECK: sync 1
|
||||
ret i32 %val
|
||||
}
|
||||
define i64 @load_i64_seq_cst(i64* %mem) {
|
||||
; CHECK-LABEL: load_i64_seq_cst
|
||||
; CHECK: sync 0
|
||||
%val = load atomic i64* %mem seq_cst, align 8
|
||||
; CHECK: sync 1
|
||||
ret i64 %val
|
||||
}
|
||||
|
||||
; Stores
|
||||
define void @store_i8_unordered(i8* %mem) {
|
||||
; CHECK-LABEL: store_i8_unordered
|
||||
; CHECK-NOT: sync
|
||||
store atomic i8 42, i8* %mem unordered, align 1
|
||||
ret void
|
||||
}
|
||||
define void @store_i16_monotonic(i16* %mem) {
|
||||
; CHECK-LABEL: store_i16_monotonic
|
||||
; CHECK-NOT: sync
|
||||
store atomic i16 42, i16* %mem monotonic, align 2
|
||||
ret void
|
||||
}
|
||||
define void @store_i32_release(i32* %mem) {
|
||||
; CHECK-LABEL: store_i32_release
|
||||
; CHECK: sync 1
|
||||
store atomic i32 42, i32* %mem release, align 4
|
||||
ret void
|
||||
}
|
||||
define void @store_i64_seq_cst(i64* %mem) {
|
||||
; CHECK-LABEL: store_i64_seq_cst
|
||||
; CHECK: sync 0
|
||||
store atomic i64 42, i64* %mem seq_cst, align 8
|
||||
ret void
|
||||
}
|
||||
|
||||
; Atomic CmpXchg
|
||||
define i8 @cas_strong_i8_sc_sc(i8* %mem) {
|
||||
; CHECK-LABEL: cas_strong_i8_sc_sc
|
||||
; CHECK: sync 0
|
||||
%val = cmpxchg i8* %mem, i8 0, i8 1 seq_cst seq_cst
|
||||
; CHECK: sync 1
|
||||
%loaded = extractvalue { i8, i1} %val, 0
|
||||
ret i8 %loaded
|
||||
}
|
||||
define i16 @cas_weak_i16_acquire_acquire(i16* %mem) {
|
||||
; CHECK-LABEL: cas_weak_i16_acquire_acquire
|
||||
;CHECK-NOT: sync
|
||||
%val = cmpxchg weak i16* %mem, i16 0, i16 1 acquire acquire
|
||||
; CHECK: sync 1
|
||||
%loaded = extractvalue { i16, i1} %val, 0
|
||||
ret i16 %loaded
|
||||
}
|
||||
define i32 @cas_strong_i32_acqrel_acquire(i32* %mem) {
|
||||
; CHECK-LABEL: cas_strong_i32_acqrel_acquire
|
||||
; CHECK: sync 1
|
||||
%val = cmpxchg i32* %mem, i32 0, i32 1 acq_rel acquire
|
||||
; CHECK: sync 1
|
||||
%loaded = extractvalue { i32, i1} %val, 0
|
||||
ret i32 %loaded
|
||||
}
|
||||
define i64 @cas_weak_i64_release_monotonic(i64* %mem) {
|
||||
; CHECK-LABEL: cas_weak_i64_release_monotonic
|
||||
; CHECK: sync 1
|
||||
%val = cmpxchg weak i64* %mem, i64 0, i64 1 release monotonic
|
||||
; CHECK-NOT: [sync ]
|
||||
%loaded = extractvalue { i64, i1} %val, 0
|
||||
ret i64 %loaded
|
||||
}
|
||||
|
||||
; AtomicRMW
|
||||
define i8 @add_i8_monotonic(i8* %mem, i8 %operand) {
|
||||
; CHECK-LABEL: add_i8_monotonic
|
||||
; CHECK-NOT: sync
|
||||
%val = atomicrmw add i8* %mem, i8 %operand monotonic
|
||||
ret i8 %val
|
||||
}
|
||||
define i16 @xor_i16_seq_cst(i16* %mem, i16 %operand) {
|
||||
; CHECK-LABEL: xor_i16_seq_cst
|
||||
; CHECK: sync 0
|
||||
%val = atomicrmw xor i16* %mem, i16 %operand seq_cst
|
||||
; CHECK: sync 1
|
||||
ret i16 %val
|
||||
}
|
||||
define i32 @xchg_i32_acq_rel(i32* %mem, i32 %operand) {
|
||||
; CHECK-LABEL: xchg_i32_acq_rel
|
||||
; CHECK: sync 1
|
||||
%val = atomicrmw xchg i32* %mem, i32 %operand acq_rel
|
||||
; CHECK: sync 1
|
||||
ret i32 %val
|
||||
}
|
||||
define i64 @and_i64_release(i64* %mem, i64 %operand) {
|
||||
; CHECK-LABEL: and_i64_release
|
||||
; CHECK: sync 1
|
||||
%val = atomicrmw and i64* %mem, i64 %operand release
|
||||
; CHECK-NOT: [sync ]
|
||||
ret i64 %val
|
||||
}
|
Loading…
Reference in New Issue
Block a user