mirror of
https://github.com/RPCS3/llvm-mirror.git
synced 2025-02-01 05:01:59 +01:00
[DSE] Extending isOverwrite to support offsetted fully overlapping stores
The isOverwrite function is making sure to identify if two stores are fully overlapping and ideally we would like to identify all the instances of OW_Complete as they'll yield possibly killable stores. The current implementation is incapable of spotting instances where the earlier store is offsetted compared to the later store, but still fully overlapped. The limitation seems to lie on the computation of the base pointers with the GetPointerBaseWithConstantOffset API that often yields different base pointers even if the stores are guaranteed to partially overlap (e.g. the alias analysis is returning AliasResult::PartialAlias). The patch relies on the offsets computed and cached by BatchAAResults (available after D93529) to determine if the offsetted overlapping is OW_Complete. Differential Revision: https://reviews.llvm.org/D97676
This commit is contained in:
parent
af0d8fe721
commit
2d8f67490d
@ -390,20 +390,29 @@ isOverwrite(const Instruction *LaterI, const Instruction *EarlierI,
|
||||
const uint64_t LaterSize = Later.Size.getValue();
|
||||
const uint64_t EarlierSize = Earlier.Size.getValue();
|
||||
|
||||
const Value *P1 = Earlier.Ptr->stripPointerCasts();
|
||||
const Value *P2 = Later.Ptr->stripPointerCasts();
|
||||
// Query the alias information
|
||||
AliasResult AAR = AA.alias(Later, Earlier);
|
||||
|
||||
// If the start pointers are the same, we just have to compare sizes to see if
|
||||
// the later store was larger than the earlier store.
|
||||
if (P1 == P2 || AA.isMustAlias(P1, P2)) {
|
||||
if (AAR == AliasResult::MustAlias) {
|
||||
// Make sure that the Later size is >= the Earlier size.
|
||||
if (LaterSize >= EarlierSize)
|
||||
return OW_Complete;
|
||||
}
|
||||
|
||||
// If we hit a partial alias we may have a full overwrite
|
||||
if (AAR == AliasResult::PartialAlias) {
|
||||
int64_t Off = AA.getClobberOffset(Later, Earlier).getValueOr(0);
|
||||
if (Off > 0 && (uint64_t)Off + EarlierSize <= LaterSize)
|
||||
return OW_Complete;
|
||||
}
|
||||
|
||||
// Check to see if the later store is to the entire object (either a global,
|
||||
// an alloca, or a byval/inalloca argument). If so, then it clearly
|
||||
// overwrites any other store to the same object.
|
||||
const Value *P1 = Earlier.Ptr->stripPointerCasts();
|
||||
const Value *P2 = Later.Ptr->stripPointerCasts();
|
||||
const Value *UO1 = getUnderlyingObject(P1), *UO2 = getUnderlyingObject(P2);
|
||||
|
||||
// If we can't resolve the same pointers to the same object, then we can't
|
||||
@ -987,8 +996,8 @@ struct DSEState {
|
||||
|
||||
DSEState(Function &F, AliasAnalysis &AA, MemorySSA &MSSA, DominatorTree &DT,
|
||||
PostDominatorTree &PDT, const TargetLibraryInfo &TLI)
|
||||
: F(F), AA(AA), BatchAA(AA), MSSA(MSSA), DT(DT), PDT(PDT), TLI(TLI),
|
||||
DL(F.getParent()->getDataLayout()) {}
|
||||
: F(F), AA(AA), BatchAA(AA, /*CacheOffsets =*/true), MSSA(MSSA), DT(DT),
|
||||
PDT(PDT), TLI(TLI), DL(F.getParent()->getDataLayout()) {}
|
||||
|
||||
static DSEState get(Function &F, AliasAnalysis &AA, MemorySSA &MSSA,
|
||||
DominatorTree &DT, PostDominatorTree &PDT,
|
||||
|
@ -0,0 +1,112 @@
|
||||
; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
|
||||
; RUN: opt < %s -dse -S | FileCheck %s
|
||||
|
||||
@BUFFER = external local_unnamed_addr global [0 x i8], align 1
|
||||
|
||||
define void @ArrayTestFullyOverlapping(i64 %0) {
|
||||
;
|
||||
; The DSE pass will try to kill the store of size i32 using the store of
|
||||
; size i64 because they fully overlap, in fact:
|
||||
;
|
||||
; - they use the same base pointer (in SCEV style '@BUFFER + %0')
|
||||
; - the offset between the two stores is 32 bits
|
||||
; - the size of the earlier store is 32 bits
|
||||
; - the size of the later store is 64 bits
|
||||
;
|
||||
; CHECK-LABEL: @ArrayTestFullyOverlapping(
|
||||
; CHECK-NEXT: [[TMP2:%.*]] = add i64 [[TMP0:%.*]], -8
|
||||
; CHECK-NEXT: [[TMP3:%.*]] = getelementptr inbounds [0 x i8], [0 x i8]* @BUFFER, i64 0, i64 [[TMP2]]
|
||||
; CHECK-NEXT: [[TMP4:%.*]] = bitcast i8* [[TMP3]] to i64*
|
||||
; CHECK-NEXT: store i64 0, i64* [[TMP4]], align 4
|
||||
; CHECK-NEXT: ret void
|
||||
;
|
||||
%2 = add i64 %0, -8
|
||||
%3 = getelementptr inbounds [0 x i8], [0 x i8]* @BUFFER, i64 0, i64 %2
|
||||
%4 = bitcast i8* %3 to i64*
|
||||
%5 = add i64 %0, -4
|
||||
%6 = getelementptr inbounds [0 x i8], [0 x i8]* @BUFFER, i64 0, i64 %5
|
||||
%7 = bitcast i8* %6 to i32*
|
||||
store i32 1, i32* %7
|
||||
store i64 0, i64* %4
|
||||
ret void
|
||||
}
|
||||
|
||||
define void @VectorTestFullyOverlapping(float* %arg, i32 %i) {
|
||||
; CHECK-LABEL: @VectorTestFullyOverlapping(
|
||||
; CHECK-NEXT: bb:
|
||||
; CHECK-NEXT: [[I2:%.*]] = zext i32 [[I:%.*]] to i64
|
||||
; CHECK-NEXT: [[I3:%.*]] = getelementptr inbounds float, float* [[ARG:%.*]], i64 [[I2]]
|
||||
; CHECK-NEXT: [[I4:%.*]] = bitcast float* [[I3]] to <2 x float>*
|
||||
; CHECK-NEXT: store <2 x float> zeroinitializer, <2 x float>* [[I4]], align 16
|
||||
; CHECK-NEXT: ret void
|
||||
;
|
||||
bb:
|
||||
%i7 = add nuw nsw i32 %i, 1
|
||||
%i8 = zext i32 %i7 to i64
|
||||
%i9 = getelementptr inbounds float, float* %arg, i64 %i8
|
||||
store float 0.0, float* %i9, align 4
|
||||
%i2 = zext i32 %i to i64
|
||||
%i3 = getelementptr inbounds float, float* %arg, i64 %i2
|
||||
%i4 = bitcast float* %i3 to <2 x float>*
|
||||
store <2 x float> <float 0.0, float 0.0>, <2 x float>* %i4, align 16
|
||||
ret void
|
||||
}
|
||||
|
||||
define void @ArrayTestPartiallyOverlapping(i64 %0) {
|
||||
;
|
||||
; The DSE pass will not kill the store because the overlap is partial
|
||||
; and won't fully clobber the i32 store.
|
||||
;
|
||||
; CHECK-LABEL: @ArrayTestPartiallyOverlapping(
|
||||
; CHECK-NEXT: [[TMP2:%.*]] = add i64 [[TMP0:%.*]], 10
|
||||
; CHECK-NEXT: [[TMP3:%.*]] = getelementptr inbounds [0 x i8], [0 x i8]* @BUFFER, i64 0, i64 [[TMP2]]
|
||||
; CHECK-NEXT: [[TMP4:%.*]] = bitcast i8* [[TMP3]] to i64*
|
||||
; CHECK-NEXT: [[TMP5:%.*]] = add i64 [[TMP0]], 15
|
||||
; CHECK-NEXT: [[TMP6:%.*]] = getelementptr inbounds [0 x i8], [0 x i8]* @BUFFER, i64 0, i64 [[TMP5]]
|
||||
; CHECK-NEXT: [[TMP7:%.*]] = bitcast i8* [[TMP6]] to i32*
|
||||
; CHECK-NEXT: store i32 1, i32* [[TMP7]], align 4
|
||||
; CHECK-NEXT: store i64 0, i64* [[TMP4]], align 4
|
||||
; CHECK-NEXT: ret void
|
||||
;
|
||||
%2 = add i64 %0, 10
|
||||
%3 = getelementptr inbounds [0 x i8], [0 x i8]* @BUFFER, i64 0, i64 %2
|
||||
%4 = bitcast i8* %3 to i64*
|
||||
%5 = add i64 %0, 15
|
||||
%6 = getelementptr inbounds [0 x i8], [0 x i8]* @BUFFER, i64 0, i64 %5
|
||||
%7 = bitcast i8* %6 to i32*
|
||||
store i32 1, i32* %7
|
||||
store i64 0, i64* %4
|
||||
ret void
|
||||
}
|
||||
|
||||
define void @VectorTestPartiallyOverlapping(float* %arg, i32 %i) {
|
||||
;
|
||||
; The DSE pass will not kill the store because the overlap is partial
|
||||
; and won't fully clobber the original store.
|
||||
;
|
||||
; CHECK-LABEL: @VectorTestPartiallyOverlapping(
|
||||
; CHECK-NEXT: bb:
|
||||
; CHECK-NEXT: [[I2:%.*]] = zext i32 [[I:%.*]] to i64
|
||||
; CHECK-NEXT: [[I3:%.*]] = getelementptr inbounds float, float* [[ARG:%.*]], i64 [[I2]]
|
||||
; CHECK-NEXT: [[I4:%.*]] = bitcast float* [[I3]] to <2 x float>*
|
||||
; CHECK-NEXT: store <2 x float> <float 1.000000e+00, float 1.000000e+00>, <2 x float>* [[I4]], align 16
|
||||
; CHECK-NEXT: [[I5:%.*]] = add nuw nsw i32 [[I]], 1
|
||||
; CHECK-NEXT: [[I6:%.*]] = zext i32 [[I5]] to i64
|
||||
; CHECK-NEXT: [[I7:%.*]] = getelementptr inbounds float, float* [[ARG]], i64 [[I6]]
|
||||
; CHECK-NEXT: [[I8:%.*]] = bitcast float* [[I7]] to <2 x float>*
|
||||
; CHECK-NEXT: store <2 x float> zeroinitializer, <2 x float>* [[I8]], align 16
|
||||
; CHECK-NEXT: ret void
|
||||
;
|
||||
bb:
|
||||
%i2 = zext i32 %i to i64
|
||||
%i3 = getelementptr inbounds float, float* %arg, i64 %i2
|
||||
%i4 = bitcast float* %i3 to <2 x float>*
|
||||
store <2 x float> <float 1.000000e+00, float 1.000000e+00>, <2 x float>* %i4, align 16
|
||||
%i5 = add nuw nsw i32 %i, 1
|
||||
%i6 = zext i32 %i5 to i64
|
||||
%i7 = getelementptr inbounds float, float* %arg, i64 %i6
|
||||
%i8 = bitcast float* %i7 to <2 x float>*
|
||||
store <2 x float> <float 0.0, float 0.0>, <2 x float>* %i8, align 16
|
||||
ret void
|
||||
}
|
||||
|
Loading…
x
Reference in New Issue
Block a user