mirror of
https://github.com/RPCS3/llvm-mirror.git
synced 2025-01-31 20:51:52 +01:00
[InstCombine] Extend peephole DSE to handle unordered atomics
This extends the same line of reasoning used in EarlyCSE w/http://reviews.llvm.org/D15352 to the DSE implementation in InstCombine. Key points: * We only remove unordered or simple stores. * The loads producing values consumed by dead stores don't influence whether the store is dead. Differential Revision: http://reviews.llvm.org/D15354 llvm-svn: 255932
This commit is contained in:
parent
4730f05bb8
commit
46cd55f309
@ -1038,9 +1038,9 @@ Instruction *InstCombiner::visitStoreInst(StoreInst &SI) {
|
|||||||
return &SI;
|
return &SI;
|
||||||
}
|
}
|
||||||
|
|
||||||
// Don't hack volatile/atomic stores.
|
// Don't hack volatile/ordered stores.
|
||||||
// FIXME: Some bits are legal for atomic stores; needs refactoring.
|
// FIXME: Some bits are legal for ordered atomic stores; needs refactoring.
|
||||||
if (!SI.isSimple()) return nullptr;
|
if (!SI.isUnordered()) return nullptr;
|
||||||
|
|
||||||
// If the RHS is an alloca with a single use, zapify the store, making the
|
// If the RHS is an alloca with a single use, zapify the store, making the
|
||||||
// alloca dead.
|
// alloca dead.
|
||||||
@ -1072,7 +1072,7 @@ Instruction *InstCombiner::visitStoreInst(StoreInst &SI) {
|
|||||||
|
|
||||||
if (StoreInst *PrevSI = dyn_cast<StoreInst>(BBI)) {
|
if (StoreInst *PrevSI = dyn_cast<StoreInst>(BBI)) {
|
||||||
// Prev store isn't volatile, and stores to the same location?
|
// Prev store isn't volatile, and stores to the same location?
|
||||||
if (PrevSI->isSimple() && equivalentAddressValues(PrevSI->getOperand(1),
|
if (PrevSI->isUnordered() && equivalentAddressValues(PrevSI->getOperand(1),
|
||||||
SI.getOperand(1))) {
|
SI.getOperand(1))) {
|
||||||
++NumDeadStore;
|
++NumDeadStore;
|
||||||
++BBI;
|
++BBI;
|
||||||
@ -1086,9 +1086,10 @@ Instruction *InstCombiner::visitStoreInst(StoreInst &SI) {
|
|||||||
// the pointer we're loading and is producing the pointer we're storing,
|
// the pointer we're loading and is producing the pointer we're storing,
|
||||||
// then *this* store is dead (X = load P; store X -> P).
|
// then *this* store is dead (X = load P; store X -> P).
|
||||||
if (LoadInst *LI = dyn_cast<LoadInst>(BBI)) {
|
if (LoadInst *LI = dyn_cast<LoadInst>(BBI)) {
|
||||||
if (LI == Val && equivalentAddressValues(LI->getOperand(0), Ptr) &&
|
if (LI == Val && equivalentAddressValues(LI->getOperand(0), Ptr)) {
|
||||||
LI->isSimple())
|
assert(SI.isUnordered() && "can't eliminate ordering operation");
|
||||||
return EraseInstFromFunction(SI);
|
return EraseInstFromFunction(SI);
|
||||||
|
}
|
||||||
|
|
||||||
// Otherwise, this is a load from some other location. Stores before it
|
// Otherwise, this is a load from some other location. Stores before it
|
||||||
// may not be dead.
|
// may not be dead.
|
||||||
@ -1114,6 +1115,10 @@ Instruction *InstCombiner::visitStoreInst(StoreInst &SI) {
|
|||||||
if (isa<UndefValue>(Val))
|
if (isa<UndefValue>(Val))
|
||||||
return EraseInstFromFunction(SI);
|
return EraseInstFromFunction(SI);
|
||||||
|
|
||||||
|
// The code below needs to be audited and adjusted for unordered atomics
|
||||||
|
if (!SI.isSimple())
|
||||||
|
return nullptr;
|
||||||
|
|
||||||
// If this store is the last instruction in the basic block (possibly
|
// If this store is the last instruction in the basic block (possibly
|
||||||
// excepting debug info instructions), and if the block ends with an
|
// excepting debug info instructions), and if the block ends with an
|
||||||
// unconditional branch, try to move it to the successor block.
|
// unconditional branch, try to move it to the successor block.
|
||||||
|
@ -113,6 +113,119 @@ for.end: ; preds = %for.cond
|
|||||||
; CHECK-NEXT: store i32 %storemerge, i32* %gi, align 4, !tbaa !0
|
; CHECK-NEXT: store i32 %storemerge, i32* %gi, align 4, !tbaa !0
|
||||||
}
|
}
|
||||||
|
|
||||||
|
define void @dse1(i32* %p) {
|
||||||
|
; CHECK-LABEL: dse1
|
||||||
|
; CHECK-NEXT: store
|
||||||
|
; CHECK-NEXT: ret
|
||||||
|
store i32 0, i32* %p
|
||||||
|
store i32 0, i32* %p
|
||||||
|
ret void
|
||||||
|
}
|
||||||
|
|
||||||
|
; Slightly subtle: if we're mixing atomic and non-atomic access to the
|
||||||
|
; same location, then the contents of the location are undefined if there's
|
||||||
|
; an actual race. As such, we're free to pick either store under the
|
||||||
|
; assumption that we're not racing with any other thread.
|
||||||
|
define void @dse2(i32* %p) {
|
||||||
|
; CHECK-LABEL: dse2
|
||||||
|
; CHECK-NEXT: store i32 0, i32* %p
|
||||||
|
; CHECK-NEXT: ret
|
||||||
|
store atomic i32 0, i32* %p unordered, align 4
|
||||||
|
store i32 0, i32* %p
|
||||||
|
ret void
|
||||||
|
}
|
||||||
|
|
||||||
|
define void @dse3(i32* %p) {
|
||||||
|
; CHECK-LABEL: dse3
|
||||||
|
; CHECK-NEXT: store atomic i32 0, i32* %p unordered, align 4
|
||||||
|
; CHECK-NEXT: ret
|
||||||
|
store i32 0, i32* %p
|
||||||
|
store atomic i32 0, i32* %p unordered, align 4
|
||||||
|
ret void
|
||||||
|
}
|
||||||
|
|
||||||
|
define void @dse4(i32* %p) {
|
||||||
|
; CHECK-LABEL: dse4
|
||||||
|
; CHECK-NEXT: store atomic i32 0, i32* %p unordered, align 4
|
||||||
|
; CHECK-NEXT: ret
|
||||||
|
store atomic i32 0, i32* %p unordered, align 4
|
||||||
|
store atomic i32 0, i32* %p unordered, align 4
|
||||||
|
ret void
|
||||||
|
}
|
||||||
|
|
||||||
|
; Implementation limit - could remove unordered store here, but
|
||||||
|
; currently don't.
|
||||||
|
define void @dse5(i32* %p) {
|
||||||
|
; CHECK-LABEL: dse5
|
||||||
|
; CHECK-NEXT: store
|
||||||
|
; CHECK-NEXT: store
|
||||||
|
; CHECK-NEXT: ret
|
||||||
|
store atomic i32 0, i32* %p unordered, align 4
|
||||||
|
store atomic i32 0, i32* %p seq_cst, align 4
|
||||||
|
ret void
|
||||||
|
}
|
||||||
|
|
||||||
|
define void @write_back1(i32* %p) {
|
||||||
|
; CHECK-LABEL: write_back1
|
||||||
|
; CHECK-NEXT: ret
|
||||||
|
%v = load i32, i32* %p
|
||||||
|
store i32 %v, i32* %p
|
||||||
|
ret void
|
||||||
|
}
|
||||||
|
|
||||||
|
define void @write_back2(i32* %p) {
|
||||||
|
; CHECK-LABEL: write_back2
|
||||||
|
; CHECK-NEXT: ret
|
||||||
|
%v = load atomic i32, i32* %p unordered, align 4
|
||||||
|
store i32 %v, i32* %p
|
||||||
|
ret void
|
||||||
|
}
|
||||||
|
|
||||||
|
define void @write_back3(i32* %p) {
|
||||||
|
; CHECK-LABEL: write_back3
|
||||||
|
; CHECK-NEXT: ret
|
||||||
|
%v = load i32, i32* %p
|
||||||
|
store atomic i32 %v, i32* %p unordered, align 4
|
||||||
|
ret void
|
||||||
|
}
|
||||||
|
|
||||||
|
define void @write_back4(i32* %p) {
|
||||||
|
; CHECK-LABEL: write_back4
|
||||||
|
; CHECK-NEXT: ret
|
||||||
|
%v = load atomic i32, i32* %p unordered, align 4
|
||||||
|
store atomic i32 %v, i32* %p unordered, align 4
|
||||||
|
ret void
|
||||||
|
}
|
||||||
|
|
||||||
|
; Can't remove store due to ordering side effect
|
||||||
|
define void @write_back5(i32* %p) {
|
||||||
|
; CHECK-LABEL: write_back5
|
||||||
|
; CHECK-NEXT: load
|
||||||
|
; CHECK-NEXT: store
|
||||||
|
; CHECK-NEXT: ret
|
||||||
|
%v = load atomic i32, i32* %p unordered, align 4
|
||||||
|
store atomic i32 %v, i32* %p seq_cst, align 4
|
||||||
|
ret void
|
||||||
|
}
|
||||||
|
|
||||||
|
define void @write_back6(i32* %p) {
|
||||||
|
; CHECK-LABEL: write_back6
|
||||||
|
; CHECK-NEXT: load
|
||||||
|
; CHECK-NEXT: ret
|
||||||
|
%v = load atomic i32, i32* %p seq_cst, align 4
|
||||||
|
store atomic i32 %v, i32* %p unordered, align 4
|
||||||
|
ret void
|
||||||
|
}
|
||||||
|
|
||||||
|
define void @write_back7(i32* %p) {
|
||||||
|
; CHECK-LABEL: write_back7
|
||||||
|
; CHECK-NEXT: load
|
||||||
|
; CHECK-NEXT: ret
|
||||||
|
%v = load atomic volatile i32, i32* %p seq_cst, align 4
|
||||||
|
store atomic i32 %v, i32* %p unordered, align 4
|
||||||
|
ret void
|
||||||
|
}
|
||||||
|
|
||||||
!0 = !{!4, !4, i64 0}
|
!0 = !{!4, !4, i64 0}
|
||||||
!1 = !{!"omnipotent char", !2}
|
!1 = !{!"omnipotent char", !2}
|
||||||
!2 = !{!"Simple C/C++ TBAA"}
|
!2 = !{!"Simple C/C++ TBAA"}
|
||||||
|
Loading…
x
Reference in New Issue
Block a user