1
0
mirror of https://github.com/RPCS3/llvm-mirror.git synced 2025-02-01 05:01:59 +01:00

[DSE] Extending isOverwrite to support offsetted fully overlapping stores

The isOverwrite function is making sure to identify if two stores
are fully overlapping and ideally we would like to identify all the
instances of OW_Complete as they'll yield possibly killable stores.
The current implementation is incapable of spotting instances where
the earlier store is offsetted compared to the later store, but
still fully overlapped. The limitation seems to lie on the
computation of the base pointers with the
GetPointerBaseWithConstantOffset API that often yields different
base pointers even if the stores are guaranteed to partially overlap
(e.g. the alias analysis is returning AliasResult::PartialAlias).

The patch relies on the offsets computed and cached by BatchAAResults
(available after D93529) to determine if the offsetted overlapping
is OW_Complete.

Differential Revision: https://reviews.llvm.org/D97676
This commit is contained in:
Matteo Favaro 2021-03-10 21:07:54 +01:00 committed by Nikita Popov
parent af0d8fe721
commit 2d8f67490d
2 changed files with 126 additions and 5 deletions

View File

@ -390,20 +390,29 @@ isOverwrite(const Instruction *LaterI, const Instruction *EarlierI,
const uint64_t LaterSize = Later.Size.getValue();
const uint64_t EarlierSize = Earlier.Size.getValue();
const Value *P1 = Earlier.Ptr->stripPointerCasts();
const Value *P2 = Later.Ptr->stripPointerCasts();
// Query the alias information
AliasResult AAR = AA.alias(Later, Earlier);
// If the start pointers are the same, we just have to compare sizes to see if
// the later store was larger than the earlier store.
if (P1 == P2 || AA.isMustAlias(P1, P2)) {
if (AAR == AliasResult::MustAlias) {
// Make sure that the Later size is >= the Earlier size.
if (LaterSize >= EarlierSize)
return OW_Complete;
}
// If we hit a partial alias we may have a full overwrite
if (AAR == AliasResult::PartialAlias) {
int64_t Off = AA.getClobberOffset(Later, Earlier).getValueOr(0);
if (Off > 0 && (uint64_t)Off + EarlierSize <= LaterSize)
return OW_Complete;
}
// Check to see if the later store is to the entire object (either a global,
// an alloca, or a byval/inalloca argument). If so, then it clearly
// overwrites any other store to the same object.
const Value *P1 = Earlier.Ptr->stripPointerCasts();
const Value *P2 = Later.Ptr->stripPointerCasts();
const Value *UO1 = getUnderlyingObject(P1), *UO2 = getUnderlyingObject(P2);
// If we can't resolve the same pointers to the same object, then we can't
@ -987,8 +996,8 @@ struct DSEState {
DSEState(Function &F, AliasAnalysis &AA, MemorySSA &MSSA, DominatorTree &DT,
PostDominatorTree &PDT, const TargetLibraryInfo &TLI)
: F(F), AA(AA), BatchAA(AA), MSSA(MSSA), DT(DT), PDT(PDT), TLI(TLI),
DL(F.getParent()->getDataLayout()) {}
: F(F), AA(AA), BatchAA(AA, /*CacheOffsets =*/true), MSSA(MSSA), DT(DT),
PDT(PDT), TLI(TLI), DL(F.getParent()->getDataLayout()) {}
static DSEState get(Function &F, AliasAnalysis &AA, MemorySSA &MSSA,
DominatorTree &DT, PostDominatorTree &PDT,

View File

@ -0,0 +1,112 @@
; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
; RUN: opt < %s -dse -S | FileCheck %s
@BUFFER = external local_unnamed_addr global [0 x i8], align 1
define void @ArrayTestFullyOverlapping(i64 %0) {
;
; The DSE pass will try to kill the store of size i32 using the store of
; size i64 because they fully overlap, in fact:
;
; - they use the same base pointer (in SCEV style '@BUFFER + %0')
; - the offset between the two stores is 32 bits
; - the size of the earlier store is 32 bits
; - the size of the later store is 64 bits
;
; CHECK-LABEL: @ArrayTestFullyOverlapping(
; CHECK-NEXT: [[TMP2:%.*]] = add i64 [[TMP0:%.*]], -8
; CHECK-NEXT: [[TMP3:%.*]] = getelementptr inbounds [0 x i8], [0 x i8]* @BUFFER, i64 0, i64 [[TMP2]]
; CHECK-NEXT: [[TMP4:%.*]] = bitcast i8* [[TMP3]] to i64*
; CHECK-NEXT: store i64 0, i64* [[TMP4]], align 4
; CHECK-NEXT: ret void
;
%2 = add i64 %0, -8
%3 = getelementptr inbounds [0 x i8], [0 x i8]* @BUFFER, i64 0, i64 %2
%4 = bitcast i8* %3 to i64*
%5 = add i64 %0, -4
%6 = getelementptr inbounds [0 x i8], [0 x i8]* @BUFFER, i64 0, i64 %5
%7 = bitcast i8* %6 to i32*
store i32 1, i32* %7
store i64 0, i64* %4
ret void
}
define void @VectorTestFullyOverlapping(float* %arg, i32 %i) {
; CHECK-LABEL: @VectorTestFullyOverlapping(
; CHECK-NEXT: bb:
; CHECK-NEXT: [[I2:%.*]] = zext i32 [[I:%.*]] to i64
; CHECK-NEXT: [[I3:%.*]] = getelementptr inbounds float, float* [[ARG:%.*]], i64 [[I2]]
; CHECK-NEXT: [[I4:%.*]] = bitcast float* [[I3]] to <2 x float>*
; CHECK-NEXT: store <2 x float> zeroinitializer, <2 x float>* [[I4]], align 16
; CHECK-NEXT: ret void
;
bb:
%i7 = add nuw nsw i32 %i, 1
%i8 = zext i32 %i7 to i64
%i9 = getelementptr inbounds float, float* %arg, i64 %i8
store float 0.0, float* %i9, align 4
%i2 = zext i32 %i to i64
%i3 = getelementptr inbounds float, float* %arg, i64 %i2
%i4 = bitcast float* %i3 to <2 x float>*
store <2 x float> <float 0.0, float 0.0>, <2 x float>* %i4, align 16
ret void
}
define void @ArrayTestPartiallyOverlapping(i64 %0) {
;
; The DSE pass will not kill the store because the overlap is partial
; and won't fully clobber the i32 store.
;
; CHECK-LABEL: @ArrayTestPartiallyOverlapping(
; CHECK-NEXT: [[TMP2:%.*]] = add i64 [[TMP0:%.*]], 10
; CHECK-NEXT: [[TMP3:%.*]] = getelementptr inbounds [0 x i8], [0 x i8]* @BUFFER, i64 0, i64 [[TMP2]]
; CHECK-NEXT: [[TMP4:%.*]] = bitcast i8* [[TMP3]] to i64*
; CHECK-NEXT: [[TMP5:%.*]] = add i64 [[TMP0]], 15
; CHECK-NEXT: [[TMP6:%.*]] = getelementptr inbounds [0 x i8], [0 x i8]* @BUFFER, i64 0, i64 [[TMP5]]
; CHECK-NEXT: [[TMP7:%.*]] = bitcast i8* [[TMP6]] to i32*
; CHECK-NEXT: store i32 1, i32* [[TMP7]], align 4
; CHECK-NEXT: store i64 0, i64* [[TMP4]], align 4
; CHECK-NEXT: ret void
;
%2 = add i64 %0, 10
%3 = getelementptr inbounds [0 x i8], [0 x i8]* @BUFFER, i64 0, i64 %2
%4 = bitcast i8* %3 to i64*
%5 = add i64 %0, 15
%6 = getelementptr inbounds [0 x i8], [0 x i8]* @BUFFER, i64 0, i64 %5
%7 = bitcast i8* %6 to i32*
store i32 1, i32* %7
store i64 0, i64* %4
ret void
}
define void @VectorTestPartiallyOverlapping(float* %arg, i32 %i) {
;
; The DSE pass will not kill the store because the overlap is partial
; and won't fully clobber the original store.
;
; CHECK-LABEL: @VectorTestPartiallyOverlapping(
; CHECK-NEXT: bb:
; CHECK-NEXT: [[I2:%.*]] = zext i32 [[I:%.*]] to i64
; CHECK-NEXT: [[I3:%.*]] = getelementptr inbounds float, float* [[ARG:%.*]], i64 [[I2]]
; CHECK-NEXT: [[I4:%.*]] = bitcast float* [[I3]] to <2 x float>*
; CHECK-NEXT: store <2 x float> <float 1.000000e+00, float 1.000000e+00>, <2 x float>* [[I4]], align 16
; CHECK-NEXT: [[I5:%.*]] = add nuw nsw i32 [[I]], 1
; CHECK-NEXT: [[I6:%.*]] = zext i32 [[I5]] to i64
; CHECK-NEXT: [[I7:%.*]] = getelementptr inbounds float, float* [[ARG]], i64 [[I6]]
; CHECK-NEXT: [[I8:%.*]] = bitcast float* [[I7]] to <2 x float>*
; CHECK-NEXT: store <2 x float> zeroinitializer, <2 x float>* [[I8]], align 16
; CHECK-NEXT: ret void
;
bb:
%i2 = zext i32 %i to i64
%i3 = getelementptr inbounds float, float* %arg, i64 %i2
%i4 = bitcast float* %i3 to <2 x float>*
store <2 x float> <float 1.000000e+00, float 1.000000e+00>, <2 x float>* %i4, align 16
%i5 = add nuw nsw i32 %i, 1
%i6 = zext i32 %i5 to i64
%i7 = getelementptr inbounds float, float* %arg, i64 %i6
%i8 = bitcast float* %i7 to <2 x float>*
store <2 x float> <float 0.0, float 0.0>, <2 x float>* %i8, align 16
ret void
}