mirror of
https://github.com/RPCS3/llvm-mirror.git
synced 2024-11-22 02:33:06 +01:00
[LAA] Make getPointersDiff() API compatible with opaque pointers
Make getPointersDiff() and sortPtrAccesses() compatible with opaque pointers by explicitly passing in the element type instead of determining it from the pointer element type. The SLPVectorizer result is slightly non-optimal in that unnecessary pointer bitcasts are added. Differential Revision: https://reviews.llvm.org/D104784
This commit is contained in:
parent
5a6e96d2d0
commit
94c11807a4
@ -684,7 +684,8 @@ int64_t getPtrStride(PredicatedScalarEvolution &PSE, Value *Ptr, const Loop *Lp,
|
||||
/// is a simple API that does not depend on the analysis pass.
|
||||
/// \param StrictCheck Ensure that the calculated distance matches the
|
||||
/// type-based one after all the bitcasts removal in the provided pointers.
|
||||
Optional<int> getPointersDiff(Value *PtrA, Value *PtrB, const DataLayout &DL,
|
||||
Optional<int> getPointersDiff(Type *ElemTyA, Value *PtrA, Type *ElemTyB,
|
||||
Value *PtrB, const DataLayout &DL,
|
||||
ScalarEvolution &SE, bool StrictCheck = false,
|
||||
bool CheckType = true);
|
||||
|
||||
@ -698,7 +699,7 @@ Optional<int> getPointersDiff(Value *PtrA, Value *PtrB, const DataLayout &DL,
|
||||
/// sorted indices in \p SortedIndices as a[i+0], a[i+1], a[i+4], a[i+7] and
|
||||
/// saves the mask for actual memory accesses in program order in
|
||||
/// \p SortedIndices as <1,2,0,3>
|
||||
bool sortPtrAccesses(ArrayRef<Value *> VL, const DataLayout &DL,
|
||||
bool sortPtrAccesses(ArrayRef<Value *> VL, Type *ElemTy, const DataLayout &DL,
|
||||
ScalarEvolution &SE,
|
||||
SmallVectorImpl<unsigned> &SortedIndices);
|
||||
|
||||
|
@ -1124,16 +1124,22 @@ int64_t llvm::getPtrStride(PredicatedScalarEvolution &PSE, Value *Ptr,
|
||||
return Stride;
|
||||
}
|
||||
|
||||
Optional<int> llvm::getPointersDiff(Value *PtrA, Value *PtrB,
|
||||
const DataLayout &DL, ScalarEvolution &SE,
|
||||
bool StrictCheck, bool CheckType) {
|
||||
Optional<int> llvm::getPointersDiff(Type *ElemTyA, Value *PtrA, Type *ElemTyB,
|
||||
Value *PtrB, const DataLayout &DL,
|
||||
ScalarEvolution &SE, bool StrictCheck,
|
||||
bool CheckType) {
|
||||
assert(PtrA && PtrB && "Expected non-nullptr pointers.");
|
||||
assert(cast<PointerType>(PtrA->getType())
|
||||
->isOpaqueOrPointeeTypeMatches(ElemTyA) && "Wrong PtrA type");
|
||||
assert(cast<PointerType>(PtrB->getType())
|
||||
->isOpaqueOrPointeeTypeMatches(ElemTyB) && "Wrong PtrB type");
|
||||
|
||||
// Make sure that A and B are different pointers.
|
||||
if (PtrA == PtrB)
|
||||
return 0;
|
||||
|
||||
// Make sure that PtrA and PtrB have the same type if required
|
||||
if (CheckType && PtrA->getType() != PtrB->getType())
|
||||
// Make sure that the element types are the same if required.
|
||||
if (CheckType && ElemTyA != ElemTyB)
|
||||
return None;
|
||||
|
||||
unsigned ASA = PtrA->getType()->getPointerAddressSpace();
|
||||
@ -1174,8 +1180,7 @@ Optional<int> llvm::getPointersDiff(Value *PtrA, Value *PtrB,
|
||||
return None;
|
||||
Val = Diff->getAPInt().getSExtValue();
|
||||
}
|
||||
Type *Ty = cast<PointerType>(PtrA->getType())->getElementType();
|
||||
int Size = DL.getTypeStoreSize(Ty);
|
||||
int Size = DL.getTypeStoreSize(ElemTyA);
|
||||
int Dist = Val / Size;
|
||||
|
||||
// Ensure that the calculated distance matches the type-based one after all
|
||||
@ -1185,8 +1190,8 @@ Optional<int> llvm::getPointersDiff(Value *PtrA, Value *PtrB,
|
||||
return None;
|
||||
}
|
||||
|
||||
bool llvm::sortPtrAccesses(ArrayRef<Value *> VL, const DataLayout &DL,
|
||||
ScalarEvolution &SE,
|
||||
bool llvm::sortPtrAccesses(ArrayRef<Value *> VL, Type *ElemTy,
|
||||
const DataLayout &DL, ScalarEvolution &SE,
|
||||
SmallVectorImpl<unsigned> &SortedIndices) {
|
||||
assert(llvm::all_of(
|
||||
VL, [](const Value *V) { return V->getType()->isPointerTy(); }) &&
|
||||
@ -1204,8 +1209,8 @@ bool llvm::sortPtrAccesses(ArrayRef<Value *> VL, const DataLayout &DL,
|
||||
int Cnt = 1;
|
||||
bool IsConsecutive = true;
|
||||
for (auto *Ptr : VL.drop_front()) {
|
||||
Optional<int> Diff =
|
||||
getPointersDiff(Ptr0, Ptr, DL, SE, /*StrictCheck=*/true);
|
||||
Optional<int> Diff = getPointersDiff(ElemTy, Ptr0, ElemTy, Ptr, DL, SE,
|
||||
/*StrictCheck=*/true);
|
||||
if (!Diff)
|
||||
return false;
|
||||
|
||||
@ -1238,8 +1243,10 @@ bool llvm::isConsecutiveAccess(Value *A, Value *B, const DataLayout &DL,
|
||||
Value *PtrB = getLoadStorePointerOperand(B);
|
||||
if (!PtrA || !PtrB)
|
||||
return false;
|
||||
Optional<int> Diff =
|
||||
getPointersDiff(PtrA, PtrB, DL, SE, /*StrictCheck=*/true, CheckType);
|
||||
Type *ElemTyA = getLoadStoreType(A);
|
||||
Type *ElemTyB = getLoadStoreType(B);
|
||||
Optional<int> Diff = getPointersDiff(ElemTyA, PtrA, ElemTyB, PtrB, DL, SE,
|
||||
/*StrictCheck=*/true, CheckType);
|
||||
return Diff && *Diff == 1;
|
||||
}
|
||||
|
||||
|
@ -987,9 +987,9 @@ public:
|
||||
if (LI1->getParent() != LI2->getParent())
|
||||
return VLOperands::ScoreFail;
|
||||
|
||||
Optional<int> Dist =
|
||||
getPointersDiff(LI1->getPointerOperand(), LI2->getPointerOperand(),
|
||||
DL, SE, /*StrictCheck=*/true);
|
||||
Optional<int> Dist = getPointersDiff(
|
||||
LI1->getType(), LI1->getPointerOperand(), LI2->getType(),
|
||||
LI2->getPointerOperand(), DL, SE, /*StrictCheck=*/true);
|
||||
return (Dist && *Dist == 1) ? VLOperands::ScoreConsecutiveLoads
|
||||
: VLOperands::ScoreFail;
|
||||
}
|
||||
@ -2968,7 +2968,7 @@ void BoUpSLP::buildTree_rec(ArrayRef<Value *> VL, unsigned Depth,
|
||||
|
||||
OrdersType CurrentOrder;
|
||||
// Check the order of pointer operands.
|
||||
if (llvm::sortPtrAccesses(PointerOps, *DL, *SE, CurrentOrder)) {
|
||||
if (llvm::sortPtrAccesses(PointerOps, ScalarTy, *DL, *SE, CurrentOrder)) {
|
||||
Value *Ptr0;
|
||||
Value *PtrN;
|
||||
if (CurrentOrder.empty()) {
|
||||
@ -2978,7 +2978,8 @@ void BoUpSLP::buildTree_rec(ArrayRef<Value *> VL, unsigned Depth,
|
||||
Ptr0 = PointerOps[CurrentOrder.front()];
|
||||
PtrN = PointerOps[CurrentOrder.back()];
|
||||
}
|
||||
Optional<int> Diff = getPointersDiff(Ptr0, PtrN, *DL, *SE);
|
||||
Optional<int> Diff = getPointersDiff(
|
||||
ScalarTy, Ptr0, ScalarTy, PtrN, *DL, *SE);
|
||||
// Check that the sorted loads are consecutive.
|
||||
if (static_cast<unsigned>(*Diff) == VL.size() - 1) {
|
||||
if (CurrentOrder.empty()) {
|
||||
@ -3243,7 +3244,7 @@ void BoUpSLP::buildTree_rec(ArrayRef<Value *> VL, unsigned Depth,
|
||||
|
||||
OrdersType CurrentOrder;
|
||||
// Check the order of pointer operands.
|
||||
if (llvm::sortPtrAccesses(PointerOps, *DL, *SE, CurrentOrder)) {
|
||||
if (llvm::sortPtrAccesses(PointerOps, ScalarTy, *DL, *SE, CurrentOrder)) {
|
||||
Value *Ptr0;
|
||||
Value *PtrN;
|
||||
if (CurrentOrder.empty()) {
|
||||
@ -3253,7 +3254,8 @@ void BoUpSLP::buildTree_rec(ArrayRef<Value *> VL, unsigned Depth,
|
||||
Ptr0 = PointerOps[CurrentOrder.front()];
|
||||
PtrN = PointerOps[CurrentOrder.back()];
|
||||
}
|
||||
Optional<int> Dist = getPointersDiff(Ptr0, PtrN, *DL, *SE);
|
||||
Optional<int> Dist =
|
||||
getPointersDiff(ScalarTy, Ptr0, ScalarTy, PtrN, *DL, *SE);
|
||||
// Check that the sorted pointer operands are consecutive.
|
||||
if (static_cast<unsigned>(*Dist) == VL.size() - 1) {
|
||||
if (CurrentOrder.empty()) {
|
||||
@ -6893,9 +6895,10 @@ bool SLPVectorizerPass::vectorizeStores(ArrayRef<StoreInst *> Stores,
|
||||
++IterCnt;
|
||||
CheckedPairs[Idx].set(K);
|
||||
CheckedPairs[K].set(Idx);
|
||||
Optional<int> Diff = getPointersDiff(Stores[K]->getPointerOperand(),
|
||||
Stores[Idx]->getPointerOperand(), *DL,
|
||||
*SE, /*StrictCheck=*/true);
|
||||
Optional<int> Diff = getPointersDiff(
|
||||
Stores[K]->getValueOperand()->getType(), Stores[K]->getPointerOperand(),
|
||||
Stores[Idx]->getValueOperand()->getType(),
|
||||
Stores[Idx]->getPointerOperand(), *DL, *SE, /*StrictCheck=*/true);
|
||||
if (!Diff || *Diff == 0)
|
||||
return false;
|
||||
int Val = *Diff;
|
||||
|
59
test/Transforms/SLPVectorizer/X86/opaque-ptr.ll
Normal file
59
test/Transforms/SLPVectorizer/X86/opaque-ptr.ll
Normal file
@ -0,0 +1,59 @@
|
||||
; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
|
||||
; RUN: opt -S -slp-vectorizer -mtriple=x86_64-apple-macosx -mcpu=haswell < %s | FileCheck %s
|
||||
|
||||
define void @test(ptr %r, ptr %p, ptr %q) #0 {
|
||||
; CHECK-LABEL: @test(
|
||||
; CHECK-NEXT: [[P0:%.*]] = getelementptr inbounds i64, ptr [[P:%.*]], i64 0
|
||||
; CHECK-NEXT: [[P1:%.*]] = getelementptr inbounds i64, ptr [[P]], i64 1
|
||||
; CHECK-NEXT: [[P2:%.*]] = getelementptr inbounds i64, ptr [[P]], i64 2
|
||||
; CHECK-NEXT: [[P3:%.*]] = getelementptr inbounds i64, ptr [[P]], i64 3
|
||||
; CHECK-NEXT: [[Q0:%.*]] = getelementptr inbounds i64, ptr [[Q:%.*]], i64 0
|
||||
; CHECK-NEXT: [[Q1:%.*]] = getelementptr inbounds i64, ptr [[Q]], i64 1
|
||||
; CHECK-NEXT: [[Q2:%.*]] = getelementptr inbounds i64, ptr [[Q]], i64 2
|
||||
; CHECK-NEXT: [[Q3:%.*]] = getelementptr inbounds i64, ptr [[Q]], i64 3
|
||||
; CHECK-NEXT: [[TMP1:%.*]] = bitcast ptr [[P0]] to <4 x i64>*
|
||||
; CHECK-NEXT: [[TMP2:%.*]] = load <4 x i64>, <4 x i64>* [[TMP1]], align 2
|
||||
; CHECK-NEXT: [[TMP3:%.*]] = bitcast ptr [[Q0]] to <4 x i64>*
|
||||
; CHECK-NEXT: [[TMP4:%.*]] = load <4 x i64>, <4 x i64>* [[TMP3]], align 2
|
||||
; CHECK-NEXT: [[TMP5:%.*]] = sub nsw <4 x i64> [[TMP2]], [[TMP4]]
|
||||
; CHECK-NEXT: [[TMP6:%.*]] = extractelement <4 x i64> [[TMP5]], i32 0
|
||||
; CHECK-NEXT: [[G0:%.*]] = getelementptr inbounds i32, ptr [[R:%.*]], i64 [[TMP6]]
|
||||
; CHECK-NEXT: [[TMP7:%.*]] = extractelement <4 x i64> [[TMP5]], i32 1
|
||||
; CHECK-NEXT: [[G1:%.*]] = getelementptr inbounds i32, ptr [[R]], i64 [[TMP7]]
|
||||
; CHECK-NEXT: [[TMP8:%.*]] = extractelement <4 x i64> [[TMP5]], i32 2
|
||||
; CHECK-NEXT: [[G2:%.*]] = getelementptr inbounds i32, ptr [[R]], i64 [[TMP8]]
|
||||
; CHECK-NEXT: [[TMP9:%.*]] = extractelement <4 x i64> [[TMP5]], i32 3
|
||||
; CHECK-NEXT: [[G3:%.*]] = getelementptr inbounds i32, ptr [[R]], i64 [[TMP9]]
|
||||
; CHECK-NEXT: ret void
|
||||
;
|
||||
%p0 = getelementptr inbounds i64, ptr %p, i64 0
|
||||
%p1 = getelementptr inbounds i64, ptr %p, i64 1
|
||||
%p2 = getelementptr inbounds i64, ptr %p, i64 2
|
||||
%p3 = getelementptr inbounds i64, ptr %p, i64 3
|
||||
|
||||
%q0 = getelementptr inbounds i64, ptr %q, i64 0
|
||||
%q1 = getelementptr inbounds i64, ptr %q, i64 1
|
||||
%q2 = getelementptr inbounds i64, ptr %q, i64 2
|
||||
%q3 = getelementptr inbounds i64, ptr %q, i64 3
|
||||
|
||||
%x0 = load i64, ptr %p0, align 2
|
||||
%x1 = load i64, ptr %p1, align 2
|
||||
%x2 = load i64, ptr %p2, align 2
|
||||
%x3 = load i64, ptr %p3, align 2
|
||||
|
||||
%y0 = load i64, ptr %q0, align 2
|
||||
%y1 = load i64, ptr %q1, align 2
|
||||
%y2 = load i64, ptr %q2, align 2
|
||||
%y3 = load i64, ptr %q3, align 2
|
||||
|
||||
%sub0 = sub nsw i64 %x0, %y0
|
||||
%sub1 = sub nsw i64 %x1, %y1
|
||||
%sub2 = sub nsw i64 %x2, %y2
|
||||
%sub3 = sub nsw i64 %x3, %y3
|
||||
|
||||
%g0 = getelementptr inbounds i32, ptr %r, i64 %sub0
|
||||
%g1 = getelementptr inbounds i32, ptr %r, i64 %sub1
|
||||
%g2 = getelementptr inbounds i32, ptr %r, i64 %sub2
|
||||
%g3 = getelementptr inbounds i32, ptr %r, i64 %sub3
|
||||
ret void
|
||||
}
|
Loading…
Reference in New Issue
Block a user