1
0
mirror of https://github.com/RPCS3/llvm-mirror.git synced 2024-11-26 12:43:36 +01:00

[Power] Use AtomicExpandPass for fence insertion, and use lwsync where appropriate

Summary:
This patch makes use of AtomicExpandPass in Power for inserting fences around
atomic as part of an effort to remove fence insertion from SelectionDAGBuilder.
As a big bonus, it lets us use sync 1 (lightweight sync, often used by the mnemonic
lwsync) instead of sync 0 (heavyweight sync) in many cases.

I also added a test, as there was no test for the barriers emitted by the Power
backend for atomic loads and stores.

Test Plan: new test + make check-all

Reviewers: jfb

Subscribers: llvm-commits

Differential Revision: http://reviews.llvm.org/D5180

llvm-svn: 218331
This commit is contained in:
Robin Morisset 2014-09-23 20:46:49 +00:00
parent a7da34c778
commit 64053dff5f
6 changed files with 178 additions and 3 deletions

View File

@ -28,8 +28,10 @@ let TargetPrefix = "ppc" in { // All intrinsics start with "llvm.ppc.".
def int_ppc_dcbz : Intrinsic<[], [llvm_ptr_ty], []>;
def int_ppc_dcbzl : Intrinsic<[], [llvm_ptr_ty], []>;
// sync instruction
// sync instruction (i.e. sync 0, a.k.a hwsync)
def int_ppc_sync : Intrinsic<[], [], []>;
// lwsync is sync 1
def int_ppc_lwsync : Intrinsic<[], [], []>;
// Intrinsics used to generate ctr-based loops. These should only be
// generated by the PowerPC backend!

View File

@ -6538,6 +6538,38 @@ void PPCTargetLowering::ReplaceNodeResults(SDNode *N,
// Other Lowering Code
//===----------------------------------------------------------------------===//
static Instruction* callIntrinsic(IRBuilder<> &Builder, Intrinsic::ID Id) {
Module *M = Builder.GetInsertBlock()->getParent()->getParent();
Function *Func = Intrinsic::getDeclaration(M, Id);
return Builder.CreateCall(Func);
}
// The mappings for emitLeading/TrailingFence is taken from
// http://www.cl.cam.ac.uk/~pes20/cpp/cpp0xmappings.html
Instruction* PPCTargetLowering::emitLeadingFence(IRBuilder<> &Builder,
AtomicOrdering Ord, bool IsStore,
bool IsLoad) const {
if (Ord == SequentiallyConsistent)
return callIntrinsic(Builder, Intrinsic::ppc_sync);
else if (isAtLeastRelease(Ord))
return callIntrinsic(Builder, Intrinsic::ppc_lwsync);
else
return nullptr;
}
Instruction* PPCTargetLowering::emitTrailingFence(IRBuilder<> &Builder,
AtomicOrdering Ord, bool IsStore,
bool IsLoad) const {
if (IsLoad && isAtLeastAcquire(Ord))
return callIntrinsic(Builder, Intrinsic::ppc_lwsync);
// FIXME: this is too conservative, a dependent branch + isync is enough.
// See http://www.cl.cam.ac.uk/~pes20/cpp/cpp0xmappings.html and
// http://www.rdrop.com/users/paulmck/scalability/paper/N2745r.2011.03.04a.html
// and http://www.cl.cam.ac.uk/~pes20/cppppc/ for justification.
else
return nullptr;
}
MachineBasicBlock *
PPCTargetLowering::EmitAtomicBinary(MachineInstr *MI, MachineBasicBlock *BB,
bool is64bit, unsigned BinOpcode) const {

View File

@ -413,6 +413,11 @@ namespace llvm {
const SelectionDAG &DAG,
unsigned Depth = 0) const override;
Instruction* emitLeadingFence(IRBuilder<> &Builder, AtomicOrdering Ord,
bool IsStore, bool IsLoad) const override;
Instruction* emitTrailingFence(IRBuilder<> &Builder, AtomicOrdering Ord,
bool IsStore, bool IsLoad) const override;
MachineBasicBlock *
EmitInstrWithCustomInserter(MachineInstr *MI,
MachineBasicBlock *MBB) const override;

View File

@ -1704,8 +1704,13 @@ let isCodeGenOnly = 1 in {
}
}
def : Pat<(int_ppc_sync), (SYNC 0)>, Requires<[IsNotBookE]>;
def : Pat<(int_ppc_sync), (MSYNC)>, Requires<[IsBookE]>;
// FIXME: We should use a specific flag to check for the presence of the sync
// instruction instead of relying on IsBookE.
// For example, the PPC A2 supports SYNC despite being a BookE processor.
def : Pat<(int_ppc_sync), (SYNC 0)>, Requires<[IsNotBookE]>;
def : Pat<(int_ppc_lwsync), (SYNC 1)>, Requires<[IsNotBookE]>;
def : Pat<(int_ppc_sync), (MSYNC)>, Requires<[IsBookE]>;
def : Pat<(int_ppc_lwsync), (MSYNC)>, Requires<[IsBookE]>;
//===----------------------------------------------------------------------===//
// PPC32 Arithmetic Instructions.

View File

@ -86,6 +86,7 @@ public:
return *getPPCTargetMachine().getSubtargetImpl();
}
void addIRPasses() override;
bool addPreISel() override;
bool addILPOpts() override;
bool addInstSelector() override;
@ -99,6 +100,11 @@ TargetPassConfig *PPCTargetMachine::createPassConfig(PassManagerBase &PM) {
return new PPCPassConfig(this, PM);
}
void PPCPassConfig::addIRPasses() {
addPass(createAtomicExpandPass(&getPPCTargetMachine()));
TargetPassConfig::addIRPasses();
}
bool PPCPassConfig::addPreISel() {
if (!DisableCTRLoops && getOptLevel() != CodeGenOpt::None)
addPass(createPPCCTRLoops(getPPCTargetMachine()));

View File

@ -0,0 +1,125 @@
; RUN: llc < %s -mtriple=powerpc-apple-darwin -march=ppc32 -verify-machineinstrs | FileCheck %s --check-prefix=CHECK --check-prefix=PPC32
; FIXME: -verify-machineinstrs currently fail on ppc64 (mismatched register/instruction).
; This is already checked for in Atomics-64.ll
; RUN: llc < %s -mtriple=powerpc-apple-darwin -march=ppc64 | FileCheck %s --check-prefix=CHECK --check-prefix=PPC64
; FIXME: we don't currently check for the operations themselves with CHECK-NEXT,
; because they are implemented in a very messy way with lwarx/stwcx.
; It should be fixed soon in another patch.
; We first check loads, for all sizes from i8 to i64.
; We also vary orderings to check for barriers.
define i8 @load_i8_unordered(i8* %mem) {
; CHECK-LABEL: load_i8_unordered
; CHECK-NOT: sync
%val = load atomic i8* %mem unordered, align 1
ret i8 %val
}
define i16 @load_i16_monotonic(i16* %mem) {
; CHECK-LABEL: load_i16_monotonic
; CHECK-NOT: sync
%val = load atomic i16* %mem monotonic, align 2
ret i16 %val
}
define i32 @load_i32_acquire(i32* %mem) {
; CHECK-LABEL: load_i32_acquire
%val = load atomic i32* %mem acquire, align 4
; CHECK: sync 1
ret i32 %val
}
define i64 @load_i64_seq_cst(i64* %mem) {
; CHECK-LABEL: load_i64_seq_cst
; CHECK: sync 0
%val = load atomic i64* %mem seq_cst, align 8
; CHECK: sync 1
ret i64 %val
}
; Stores
define void @store_i8_unordered(i8* %mem) {
; CHECK-LABEL: store_i8_unordered
; CHECK-NOT: sync
store atomic i8 42, i8* %mem unordered, align 1
ret void
}
define void @store_i16_monotonic(i16* %mem) {
; CHECK-LABEL: store_i16_monotonic
; CHECK-NOT: sync
store atomic i16 42, i16* %mem monotonic, align 2
ret void
}
define void @store_i32_release(i32* %mem) {
; CHECK-LABEL: store_i32_release
; CHECK: sync 1
store atomic i32 42, i32* %mem release, align 4
ret void
}
define void @store_i64_seq_cst(i64* %mem) {
; CHECK-LABEL: store_i64_seq_cst
; CHECK: sync 0
store atomic i64 42, i64* %mem seq_cst, align 8
ret void
}
; Atomic CmpXchg
define i8 @cas_strong_i8_sc_sc(i8* %mem) {
; CHECK-LABEL: cas_strong_i8_sc_sc
; CHECK: sync 0
%val = cmpxchg i8* %mem, i8 0, i8 1 seq_cst seq_cst
; CHECK: sync 1
%loaded = extractvalue { i8, i1} %val, 0
ret i8 %loaded
}
define i16 @cas_weak_i16_acquire_acquire(i16* %mem) {
; CHECK-LABEL: cas_weak_i16_acquire_acquire
;CHECK-NOT: sync
%val = cmpxchg weak i16* %mem, i16 0, i16 1 acquire acquire
; CHECK: sync 1
%loaded = extractvalue { i16, i1} %val, 0
ret i16 %loaded
}
define i32 @cas_strong_i32_acqrel_acquire(i32* %mem) {
; CHECK-LABEL: cas_strong_i32_acqrel_acquire
; CHECK: sync 1
%val = cmpxchg i32* %mem, i32 0, i32 1 acq_rel acquire
; CHECK: sync 1
%loaded = extractvalue { i32, i1} %val, 0
ret i32 %loaded
}
define i64 @cas_weak_i64_release_monotonic(i64* %mem) {
; CHECK-LABEL: cas_weak_i64_release_monotonic
; CHECK: sync 1
%val = cmpxchg weak i64* %mem, i64 0, i64 1 release monotonic
; CHECK-NOT: [sync ]
%loaded = extractvalue { i64, i1} %val, 0
ret i64 %loaded
}
; AtomicRMW
define i8 @add_i8_monotonic(i8* %mem, i8 %operand) {
; CHECK-LABEL: add_i8_monotonic
; CHECK-NOT: sync
%val = atomicrmw add i8* %mem, i8 %operand monotonic
ret i8 %val
}
define i16 @xor_i16_seq_cst(i16* %mem, i16 %operand) {
; CHECK-LABEL: xor_i16_seq_cst
; CHECK: sync 0
%val = atomicrmw xor i16* %mem, i16 %operand seq_cst
; CHECK: sync 1
ret i16 %val
}
define i32 @xchg_i32_acq_rel(i32* %mem, i32 %operand) {
; CHECK-LABEL: xchg_i32_acq_rel
; CHECK: sync 1
%val = atomicrmw xchg i32* %mem, i32 %operand acq_rel
; CHECK: sync 1
ret i32 %val
}
define i64 @and_i64_release(i64* %mem, i64 %operand) {
; CHECK-LABEL: and_i64_release
; CHECK: sync 1
%val = atomicrmw and i64* %mem, i64 %operand release
; CHECK-NOT: [sync ]
ret i64 %val
}