mirror of
https://github.com/RPCS3/llvm-mirror.git
synced 2024-10-19 02:52:53 +02:00
[SLP]No need to mark scatter load pointer as scalar as it gets vectorized.
Pointer operand of scatter loads does not remain scalar in the tree (it gest vectorized) and thus must not be marked as the scalar that remains scalar in vectorized form. Differential Revision: https://reviews.llvm.org/D96818
This commit is contained in:
parent
430a6166c8
commit
17ad018429
@ -2575,6 +2575,7 @@ void BoUpSLP::buildTree(ArrayRef<Value *> Roots,
|
||||
// instructions. If that is the case, the one in Lane 0 will
|
||||
// be used.
|
||||
if (UseScalar != U ||
|
||||
UseEntry->State == TreeEntry::ScatterVectorize ||
|
||||
!InTreeUserNeedToExtract(Scalar, UserInst, TLI)) {
|
||||
LLVM_DEBUG(dbgs() << "SLP: \tInternal user will be removed:" << *U
|
||||
<< ".\n");
|
||||
|
@ -243,20 +243,19 @@ define void @lookahead_external_uses(double* %A, double *%B, double *%C, double
|
||||
; CHECK-NEXT: [[A1:%.*]] = load double, double* [[IDXA1]], align 8
|
||||
; CHECK-NEXT: [[B2:%.*]] = load double, double* [[IDXB2]], align 8
|
||||
; CHECK-NEXT: [[TMP3:%.*]] = call <2 x double> @llvm.masked.gather.v2f64.v2p0f64(<2 x double*> [[TMP2]], i32 8, <2 x i1> <i1 true, i1 true>, <2 x double> undef)
|
||||
; CHECK-NEXT: [[TMP4:%.*]] = extractelement <2 x double*> [[TMP2]], i32 0
|
||||
; CHECK-NEXT: [[TMP5:%.*]] = bitcast double* [[IDXB0]] to <2 x double>*
|
||||
; CHECK-NEXT: [[TMP6:%.*]] = load <2 x double>, <2 x double>* [[TMP5]], align 8
|
||||
; CHECK-NEXT: [[TMP7:%.*]] = insertelement <2 x double> poison, double [[C0]], i32 0
|
||||
; CHECK-NEXT: [[TMP8:%.*]] = insertelement <2 x double> [[TMP7]], double [[A1]], i32 1
|
||||
; CHECK-NEXT: [[TMP9:%.*]] = insertelement <2 x double> poison, double [[D0]], i32 0
|
||||
; CHECK-NEXT: [[TMP10:%.*]] = insertelement <2 x double> [[TMP9]], double [[B2]], i32 1
|
||||
; CHECK-NEXT: [[TMP11:%.*]] = fsub fast <2 x double> [[TMP8]], [[TMP10]]
|
||||
; CHECK-NEXT: [[TMP12:%.*]] = fsub fast <2 x double> [[TMP3]], [[TMP6]]
|
||||
; CHECK-NEXT: [[TMP13:%.*]] = fadd fast <2 x double> [[TMP12]], [[TMP11]]
|
||||
; CHECK-NEXT: [[TMP4:%.*]] = bitcast double* [[IDXB0]] to <2 x double>*
|
||||
; CHECK-NEXT: [[TMP5:%.*]] = load <2 x double>, <2 x double>* [[TMP4]], align 8
|
||||
; CHECK-NEXT: [[TMP6:%.*]] = insertelement <2 x double> poison, double [[C0]], i32 0
|
||||
; CHECK-NEXT: [[TMP7:%.*]] = insertelement <2 x double> [[TMP6]], double [[A1]], i32 1
|
||||
; CHECK-NEXT: [[TMP8:%.*]] = insertelement <2 x double> poison, double [[D0]], i32 0
|
||||
; CHECK-NEXT: [[TMP9:%.*]] = insertelement <2 x double> [[TMP8]], double [[B2]], i32 1
|
||||
; CHECK-NEXT: [[TMP10:%.*]] = fsub fast <2 x double> [[TMP7]], [[TMP9]]
|
||||
; CHECK-NEXT: [[TMP11:%.*]] = fsub fast <2 x double> [[TMP3]], [[TMP5]]
|
||||
; CHECK-NEXT: [[TMP12:%.*]] = fadd fast <2 x double> [[TMP11]], [[TMP10]]
|
||||
; CHECK-NEXT: [[IDXS0:%.*]] = getelementptr inbounds double, double* [[S:%.*]], i64 0
|
||||
; CHECK-NEXT: [[IDXS1:%.*]] = getelementptr inbounds double, double* [[S]], i64 1
|
||||
; CHECK-NEXT: [[TMP14:%.*]] = bitcast double* [[IDXS0]] to <2 x double>*
|
||||
; CHECK-NEXT: store <2 x double> [[TMP13]], <2 x double>* [[TMP14]], align 8
|
||||
; CHECK-NEXT: [[TMP13:%.*]] = bitcast double* [[IDXS0]] to <2 x double>*
|
||||
; CHECK-NEXT: store <2 x double> [[TMP12]], <2 x double>* [[TMP13]], align 8
|
||||
; CHECK-NEXT: store double [[A1]], double* [[EXT1:%.*]], align 8
|
||||
; CHECK-NEXT: ret void
|
||||
;
|
||||
@ -320,34 +319,36 @@ define void @lookahead_limit_users_budget(double* %A, double *%B, double *%C, do
|
||||
; CHECK-LABEL: @lookahead_limit_users_budget(
|
||||
; CHECK-NEXT: entry:
|
||||
; CHECK-NEXT: [[IDXA0:%.*]] = getelementptr inbounds double, double* [[A:%.*]], i64 0
|
||||
; CHECK-NEXT: [[IDXB0:%.*]] = getelementptr inbounds double, double* [[B:%.*]], i64 0
|
||||
; CHECK-NEXT: [[IDXC0:%.*]] = getelementptr inbounds double, double* [[C:%.*]], i64 0
|
||||
; CHECK-NEXT: [[IDXD0:%.*]] = getelementptr inbounds double, double* [[D:%.*]], i64 0
|
||||
; CHECK-NEXT: [[IDXA1:%.*]] = getelementptr inbounds double, double* [[A]], i64 1
|
||||
; CHECK-NEXT: [[IDXB2:%.*]] = getelementptr inbounds double, double* [[B]], i64 2
|
||||
; CHECK-NEXT: [[TMP0:%.*]] = insertelement <2 x double*> poison, double* [[B:%.*]], i32 0
|
||||
; CHECK-NEXT: [[TMP1:%.*]] = insertelement <2 x double*> [[TMP0]], double* [[B]], i32 1
|
||||
; CHECK-NEXT: [[TMP2:%.*]] = getelementptr double, <2 x double*> [[TMP1]], <2 x i64> <i64 0, i64 2>
|
||||
; CHECK-NEXT: [[IDXA2:%.*]] = getelementptr inbounds double, double* [[A]], i64 2
|
||||
; CHECK-NEXT: [[IDXB1:%.*]] = getelementptr inbounds double, double* [[B]], i64 1
|
||||
; CHECK-NEXT: [[A0:%.*]] = load double, double* [[IDXA0]], align 8
|
||||
; CHECK-NEXT: [[B0:%.*]] = load double, double* [[IDXB0]], align 8
|
||||
; CHECK-NEXT: [[C0:%.*]] = load double, double* [[IDXC0]], align 8
|
||||
; CHECK-NEXT: [[D0:%.*]] = load double, double* [[IDXD0]], align 8
|
||||
; CHECK-NEXT: [[A1:%.*]] = load double, double* [[IDXA1]], align 8
|
||||
; CHECK-NEXT: [[B2:%.*]] = load double, double* [[IDXB2]], align 8
|
||||
; CHECK-NEXT: [[TMP3:%.*]] = bitcast double* [[IDXA0]] to <2 x double>*
|
||||
; CHECK-NEXT: [[TMP4:%.*]] = load <2 x double>, <2 x double>* [[TMP3]], align 8
|
||||
; CHECK-NEXT: [[TMP5:%.*]] = call <2 x double> @llvm.masked.gather.v2f64.v2p0f64(<2 x double*> [[TMP2]], i32 8, <2 x i1> <i1 true, i1 true>, <2 x double> undef)
|
||||
; CHECK-NEXT: [[A2:%.*]] = load double, double* [[IDXA2]], align 8
|
||||
; CHECK-NEXT: [[B1:%.*]] = load double, double* [[IDXB1]], align 8
|
||||
; CHECK-NEXT: [[SUBA0B0:%.*]] = fsub fast double [[A0]], [[B0]]
|
||||
; CHECK-NEXT: [[SUBC0D0:%.*]] = fsub fast double [[C0]], [[D0]]
|
||||
; CHECK-NEXT: [[SUBA1B2:%.*]] = fsub fast double [[A1]], [[B2]]
|
||||
; CHECK-NEXT: [[SUBA2B1:%.*]] = fsub fast double [[A2]], [[B1]]
|
||||
; CHECK-NEXT: [[ADD0:%.*]] = fadd fast double [[SUBA0B0]], [[SUBC0D0]]
|
||||
; CHECK-NEXT: [[ADD1:%.*]] = fadd fast double [[SUBA1B2]], [[SUBA2B1]]
|
||||
; CHECK-NEXT: [[TMP6:%.*]] = fsub fast <2 x double> [[TMP4]], [[TMP5]]
|
||||
; CHECK-NEXT: [[TMP7:%.*]] = insertelement <2 x double> poison, double [[C0]], i32 0
|
||||
; CHECK-NEXT: [[TMP8:%.*]] = insertelement <2 x double> [[TMP7]], double [[A2]], i32 1
|
||||
; CHECK-NEXT: [[TMP9:%.*]] = insertelement <2 x double> poison, double [[D0]], i32 0
|
||||
; CHECK-NEXT: [[TMP10:%.*]] = insertelement <2 x double> [[TMP9]], double [[B1]], i32 1
|
||||
; CHECK-NEXT: [[TMP11:%.*]] = fsub fast <2 x double> [[TMP8]], [[TMP10]]
|
||||
; CHECK-NEXT: [[TMP12:%.*]] = fadd fast <2 x double> [[TMP6]], [[TMP11]]
|
||||
; CHECK-NEXT: [[IDXS0:%.*]] = getelementptr inbounds double, double* [[S:%.*]], i64 0
|
||||
; CHECK-NEXT: [[IDXS1:%.*]] = getelementptr inbounds double, double* [[S]], i64 1
|
||||
; CHECK-NEXT: store double [[ADD0]], double* [[IDXS0]], align 8
|
||||
; CHECK-NEXT: store double [[ADD1]], double* [[IDXS1]], align 8
|
||||
; CHECK-NEXT: store double [[A1]], double* [[EXT1:%.*]], align 8
|
||||
; CHECK-NEXT: store double [[A1]], double* [[EXT2:%.*]], align 8
|
||||
; CHECK-NEXT: store double [[A1]], double* [[EXT3:%.*]], align 8
|
||||
; CHECK-NEXT: [[TMP13:%.*]] = bitcast double* [[IDXS0]] to <2 x double>*
|
||||
; CHECK-NEXT: store <2 x double> [[TMP12]], <2 x double>* [[TMP13]], align 8
|
||||
; CHECK-NEXT: [[TMP14:%.*]] = extractelement <2 x double> [[TMP4]], i32 1
|
||||
; CHECK-NEXT: store double [[TMP14]], double* [[EXT1:%.*]], align 8
|
||||
; CHECK-NEXT: store double [[TMP14]], double* [[EXT2:%.*]], align 8
|
||||
; CHECK-NEXT: store double [[TMP14]], double* [[EXT3:%.*]], align 8
|
||||
; CHECK-NEXT: store double [[B1]], double* [[EXT4:%.*]], align 8
|
||||
; CHECK-NEXT: store double [[B1]], double* [[EXT5:%.*]], align 8
|
||||
; CHECK-NEXT: ret void
|
||||
|
Loading…
Reference in New Issue
Block a user