1
0
mirror of https://github.com/RPCS3/llvm-mirror.git synced 2024-10-19 02:52:53 +02:00

[SLP]No need to mark scatter load pointer as scalar as it gets vectorized.

Pointer operand of scatter loads does not remain scalar in the tree (it
gest vectorized) and thus must not be marked as the scalar that remains
scalar in vectorized form.

Differential Revision: https://reviews.llvm.org/D96818
This commit is contained in:
Alexey Bataev 2021-02-16 14:31:18 -08:00
parent 430a6166c8
commit 17ad018429
2 changed files with 31 additions and 29 deletions

View File

@ -2575,6 +2575,7 @@ void BoUpSLP::buildTree(ArrayRef<Value *> Roots,
// instructions. If that is the case, the one in Lane 0 will
// be used.
if (UseScalar != U ||
UseEntry->State == TreeEntry::ScatterVectorize ||
!InTreeUserNeedToExtract(Scalar, UserInst, TLI)) {
LLVM_DEBUG(dbgs() << "SLP: \tInternal user will be removed:" << *U
<< ".\n");

View File

@ -243,20 +243,19 @@ define void @lookahead_external_uses(double* %A, double *%B, double *%C, double
; CHECK-NEXT: [[A1:%.*]] = load double, double* [[IDXA1]], align 8
; CHECK-NEXT: [[B2:%.*]] = load double, double* [[IDXB2]], align 8
; CHECK-NEXT: [[TMP3:%.*]] = call <2 x double> @llvm.masked.gather.v2f64.v2p0f64(<2 x double*> [[TMP2]], i32 8, <2 x i1> <i1 true, i1 true>, <2 x double> undef)
; CHECK-NEXT: [[TMP4:%.*]] = extractelement <2 x double*> [[TMP2]], i32 0
; CHECK-NEXT: [[TMP5:%.*]] = bitcast double* [[IDXB0]] to <2 x double>*
; CHECK-NEXT: [[TMP6:%.*]] = load <2 x double>, <2 x double>* [[TMP5]], align 8
; CHECK-NEXT: [[TMP7:%.*]] = insertelement <2 x double> poison, double [[C0]], i32 0
; CHECK-NEXT: [[TMP8:%.*]] = insertelement <2 x double> [[TMP7]], double [[A1]], i32 1
; CHECK-NEXT: [[TMP9:%.*]] = insertelement <2 x double> poison, double [[D0]], i32 0
; CHECK-NEXT: [[TMP10:%.*]] = insertelement <2 x double> [[TMP9]], double [[B2]], i32 1
; CHECK-NEXT: [[TMP11:%.*]] = fsub fast <2 x double> [[TMP8]], [[TMP10]]
; CHECK-NEXT: [[TMP12:%.*]] = fsub fast <2 x double> [[TMP3]], [[TMP6]]
; CHECK-NEXT: [[TMP13:%.*]] = fadd fast <2 x double> [[TMP12]], [[TMP11]]
; CHECK-NEXT: [[TMP4:%.*]] = bitcast double* [[IDXB0]] to <2 x double>*
; CHECK-NEXT: [[TMP5:%.*]] = load <2 x double>, <2 x double>* [[TMP4]], align 8
; CHECK-NEXT: [[TMP6:%.*]] = insertelement <2 x double> poison, double [[C0]], i32 0
; CHECK-NEXT: [[TMP7:%.*]] = insertelement <2 x double> [[TMP6]], double [[A1]], i32 1
; CHECK-NEXT: [[TMP8:%.*]] = insertelement <2 x double> poison, double [[D0]], i32 0
; CHECK-NEXT: [[TMP9:%.*]] = insertelement <2 x double> [[TMP8]], double [[B2]], i32 1
; CHECK-NEXT: [[TMP10:%.*]] = fsub fast <2 x double> [[TMP7]], [[TMP9]]
; CHECK-NEXT: [[TMP11:%.*]] = fsub fast <2 x double> [[TMP3]], [[TMP5]]
; CHECK-NEXT: [[TMP12:%.*]] = fadd fast <2 x double> [[TMP11]], [[TMP10]]
; CHECK-NEXT: [[IDXS0:%.*]] = getelementptr inbounds double, double* [[S:%.*]], i64 0
; CHECK-NEXT: [[IDXS1:%.*]] = getelementptr inbounds double, double* [[S]], i64 1
; CHECK-NEXT: [[TMP14:%.*]] = bitcast double* [[IDXS0]] to <2 x double>*
; CHECK-NEXT: store <2 x double> [[TMP13]], <2 x double>* [[TMP14]], align 8
; CHECK-NEXT: [[TMP13:%.*]] = bitcast double* [[IDXS0]] to <2 x double>*
; CHECK-NEXT: store <2 x double> [[TMP12]], <2 x double>* [[TMP13]], align 8
; CHECK-NEXT: store double [[A1]], double* [[EXT1:%.*]], align 8
; CHECK-NEXT: ret void
;
@ -320,34 +319,36 @@ define void @lookahead_limit_users_budget(double* %A, double *%B, double *%C, do
; CHECK-LABEL: @lookahead_limit_users_budget(
; CHECK-NEXT: entry:
; CHECK-NEXT: [[IDXA0:%.*]] = getelementptr inbounds double, double* [[A:%.*]], i64 0
; CHECK-NEXT: [[IDXB0:%.*]] = getelementptr inbounds double, double* [[B:%.*]], i64 0
; CHECK-NEXT: [[IDXC0:%.*]] = getelementptr inbounds double, double* [[C:%.*]], i64 0
; CHECK-NEXT: [[IDXD0:%.*]] = getelementptr inbounds double, double* [[D:%.*]], i64 0
; CHECK-NEXT: [[IDXA1:%.*]] = getelementptr inbounds double, double* [[A]], i64 1
; CHECK-NEXT: [[IDXB2:%.*]] = getelementptr inbounds double, double* [[B]], i64 2
; CHECK-NEXT: [[TMP0:%.*]] = insertelement <2 x double*> poison, double* [[B:%.*]], i32 0
; CHECK-NEXT: [[TMP1:%.*]] = insertelement <2 x double*> [[TMP0]], double* [[B]], i32 1
; CHECK-NEXT: [[TMP2:%.*]] = getelementptr double, <2 x double*> [[TMP1]], <2 x i64> <i64 0, i64 2>
; CHECK-NEXT: [[IDXA2:%.*]] = getelementptr inbounds double, double* [[A]], i64 2
; CHECK-NEXT: [[IDXB1:%.*]] = getelementptr inbounds double, double* [[B]], i64 1
; CHECK-NEXT: [[A0:%.*]] = load double, double* [[IDXA0]], align 8
; CHECK-NEXT: [[B0:%.*]] = load double, double* [[IDXB0]], align 8
; CHECK-NEXT: [[C0:%.*]] = load double, double* [[IDXC0]], align 8
; CHECK-NEXT: [[D0:%.*]] = load double, double* [[IDXD0]], align 8
; CHECK-NEXT: [[A1:%.*]] = load double, double* [[IDXA1]], align 8
; CHECK-NEXT: [[B2:%.*]] = load double, double* [[IDXB2]], align 8
; CHECK-NEXT: [[TMP3:%.*]] = bitcast double* [[IDXA0]] to <2 x double>*
; CHECK-NEXT: [[TMP4:%.*]] = load <2 x double>, <2 x double>* [[TMP3]], align 8
; CHECK-NEXT: [[TMP5:%.*]] = call <2 x double> @llvm.masked.gather.v2f64.v2p0f64(<2 x double*> [[TMP2]], i32 8, <2 x i1> <i1 true, i1 true>, <2 x double> undef)
; CHECK-NEXT: [[A2:%.*]] = load double, double* [[IDXA2]], align 8
; CHECK-NEXT: [[B1:%.*]] = load double, double* [[IDXB1]], align 8
; CHECK-NEXT: [[SUBA0B0:%.*]] = fsub fast double [[A0]], [[B0]]
; CHECK-NEXT: [[SUBC0D0:%.*]] = fsub fast double [[C0]], [[D0]]
; CHECK-NEXT: [[SUBA1B2:%.*]] = fsub fast double [[A1]], [[B2]]
; CHECK-NEXT: [[SUBA2B1:%.*]] = fsub fast double [[A2]], [[B1]]
; CHECK-NEXT: [[ADD0:%.*]] = fadd fast double [[SUBA0B0]], [[SUBC0D0]]
; CHECK-NEXT: [[ADD1:%.*]] = fadd fast double [[SUBA1B2]], [[SUBA2B1]]
; CHECK-NEXT: [[TMP6:%.*]] = fsub fast <2 x double> [[TMP4]], [[TMP5]]
; CHECK-NEXT: [[TMP7:%.*]] = insertelement <2 x double> poison, double [[C0]], i32 0
; CHECK-NEXT: [[TMP8:%.*]] = insertelement <2 x double> [[TMP7]], double [[A2]], i32 1
; CHECK-NEXT: [[TMP9:%.*]] = insertelement <2 x double> poison, double [[D0]], i32 0
; CHECK-NEXT: [[TMP10:%.*]] = insertelement <2 x double> [[TMP9]], double [[B1]], i32 1
; CHECK-NEXT: [[TMP11:%.*]] = fsub fast <2 x double> [[TMP8]], [[TMP10]]
; CHECK-NEXT: [[TMP12:%.*]] = fadd fast <2 x double> [[TMP6]], [[TMP11]]
; CHECK-NEXT: [[IDXS0:%.*]] = getelementptr inbounds double, double* [[S:%.*]], i64 0
; CHECK-NEXT: [[IDXS1:%.*]] = getelementptr inbounds double, double* [[S]], i64 1
; CHECK-NEXT: store double [[ADD0]], double* [[IDXS0]], align 8
; CHECK-NEXT: store double [[ADD1]], double* [[IDXS1]], align 8
; CHECK-NEXT: store double [[A1]], double* [[EXT1:%.*]], align 8
; CHECK-NEXT: store double [[A1]], double* [[EXT2:%.*]], align 8
; CHECK-NEXT: store double [[A1]], double* [[EXT3:%.*]], align 8
; CHECK-NEXT: [[TMP13:%.*]] = bitcast double* [[IDXS0]] to <2 x double>*
; CHECK-NEXT: store <2 x double> [[TMP12]], <2 x double>* [[TMP13]], align 8
; CHECK-NEXT: [[TMP14:%.*]] = extractelement <2 x double> [[TMP4]], i32 1
; CHECK-NEXT: store double [[TMP14]], double* [[EXT1:%.*]], align 8
; CHECK-NEXT: store double [[TMP14]], double* [[EXT2:%.*]], align 8
; CHECK-NEXT: store double [[TMP14]], double* [[EXT3:%.*]], align 8
; CHECK-NEXT: store double [[B1]], double* [[EXT4:%.*]], align 8
; CHECK-NEXT: store double [[B1]], double* [[EXT5:%.*]], align 8
; CHECK-NEXT: ret void