1
0
mirror of https://github.com/RPCS3/llvm-mirror.git synced 2024-11-26 04:32:44 +01:00

[Scalarizer] InsertElement handling w/ constant insert index

Summary: As it can be clearly seen from the diff, this results in nicer IR.

Reviewers: jdoerfert, arsenm, bjope, cameron.mcinally

Reviewed By: jdoerfert

Subscribers: arphaman, wdng, hiraditya, llvm-commits

Tags: #llvm

Differential Revision: https://reviews.llvm.org/D83102
This commit is contained in:
Roman Lebedev 2020-07-06 12:39:06 +03:00
parent 3c48d18cf9
commit bc4a979a8a
3 changed files with 33 additions and 16 deletions

View File

@ -192,6 +192,7 @@ public:
bool visitGetElementPtrInst(GetElementPtrInst &GEPI);
bool visitCastInst(CastInst &CI);
bool visitBitCastInst(BitCastInst &BCI);
bool visitInsertElementInst(InsertElementInst &IEI);
bool visitShuffleVectorInst(ShuffleVectorInst &SVI);
bool visitPHINode(PHINode &PHI);
bool visitLoadInst(LoadInst &LI);
@ -389,7 +390,7 @@ void ScalarizerVisitor::gather(Instruction *Op, const ValueVector &CV) {
if (!SV.empty()) {
for (unsigned I = 0, E = SV.size(); I != E; ++I) {
Value *V = SV[I];
if (V == nullptr)
if (V == nullptr || SV[I] == CV[I])
continue;
Instruction *Old = cast<Instruction>(V);
@ -740,6 +741,31 @@ bool ScalarizerVisitor::visitBitCastInst(BitCastInst &BCI) {
return true;
}
bool ScalarizerVisitor::visitInsertElementInst(InsertElementInst &IEI) {
VectorType *VT = dyn_cast<VectorType>(IEI.getType());
if (!VT)
return false;
unsigned NumElems = VT->getNumElements();
IRBuilder<> Builder(&IEI);
Scatterer Op0 = scatter(&IEI, IEI.getOperand(0));
Value *NewElt = IEI.getOperand(1);
Value *InsIdx = IEI.getOperand(2);
ValueVector Res;
Res.resize(NumElems);
if (auto *CI = dyn_cast<ConstantInt>(InsIdx)) {
for (unsigned I = 0; I < NumElems; ++I)
Res[I] = CI->getValue().getZExtValue() == I ? NewElt : Op0[I];
} else {
return false;
}
gather(&IEI, Res);
return true;
}
bool ScalarizerVisitor::visitShuffleVectorInst(ShuffleVectorInst &SVI) {
VectorType *VT = dyn_cast<VectorType>(SVI.getType());
if (!VT)

View File

@ -276,14 +276,14 @@ define void @f8(<4 x float *> *%dest, <4 x float *> %ptr0, <4 x i32> %i0,
; CHECK: %dest.i1 = getelementptr float*, float** %dest.i0, i32 1
; CHECK: %dest.i2 = getelementptr float*, float** %dest.i0, i32 2
; CHECK: %dest.i3 = getelementptr float*, float** %dest.i0, i32 3
; CHECK: %ptr0.i0 = extractelement <4 x float*> %ptr0, i32 0
; CHECK: %ptr0.i2 = extractelement <4 x float*> %ptr0, i32 2
; CHECK: %ptr0.i3 = extractelement <4 x float*> %ptr0, i32 3
; CHECK: %i0.i1 = extractelement <4 x i32> %i0, i32 1
; CHECK: %i0.i3 = extractelement <4 x i32> %i0, i32 3
; CHECK: %ptr0.i0 = extractelement <4 x float*> %ptr0, i32 0
; CHECK: %val.i0 = getelementptr float, float* %ptr0.i0, i32 100
; CHECK: %val.i1 = getelementptr float, float* %other, i32 %i0.i1
; CHECK: %ptr0.i2 = extractelement <4 x float*> %ptr0, i32 2
; CHECK: %val.i2 = getelementptr float, float* %ptr0.i2, i32 100
; CHECK: %ptr0.i3 = extractelement <4 x float*> %ptr0, i32 3
; CHECK: %val.i3 = getelementptr float, float* %ptr0.i3, i32 %i0.i3
; CHECK: store float* %val.i0, float** %dest.i0, align 32
; CHECK: store float* %val.i1, float** %dest.i1, align 8

View File

@ -12,18 +12,9 @@ define <4 x i32> @f1(<4 x i32> *%src, i32 %repl, i32 %index) {
; ALL-NEXT: [[VAL0_I1:%.*]] = load i32, i32* [[SRC_I1]], align 4
; ALL-NEXT: [[SRC_I2:%.*]] = getelementptr i32, i32* [[SRC_I0]], i32 2
; ALL-NEXT: [[VAL0_I2:%.*]] = load i32, i32* [[SRC_I2]], align 8
; ALL-NEXT: [[SRC_I3:%.*]] = getelementptr i32, i32* [[SRC_I0]], i32 3
; ALL-NEXT: [[VAL0_I3:%.*]] = load i32, i32* [[SRC_I3]], align 4
; ALL-NEXT: [[VAL0_UPTO0:%.*]] = insertelement <4 x i32> undef, i32 [[VAL0_I0]], i32 0
; ALL-NEXT: [[VAL0_UPTO1:%.*]] = insertelement <4 x i32> [[VAL0_UPTO0]], i32 [[VAL0_I1]], i32 1
; ALL-NEXT: [[VAL0_UPTO2:%.*]] = insertelement <4 x i32> [[VAL0_UPTO1]], i32 [[VAL0_I2]], i32 2
; ALL-NEXT: [[VAL0:%.*]] = insertelement <4 x i32> [[VAL0_UPTO2]], i32 [[VAL0_I3]], i32 3
; ALL-NEXT: [[VAL0_I01:%.*]] = extractelement <4 x i32> [[VAL0]], i32 0
; ALL-NEXT: [[VAL2_I0:%.*]] = shl i32 1, [[VAL0_I01]]
; ALL-NEXT: [[VAL0_I12:%.*]] = extractelement <4 x i32> [[VAL0]], i32 1
; ALL-NEXT: [[VAL2_I1:%.*]] = shl i32 2, [[VAL0_I12]]
; ALL-NEXT: [[VAL0_I23:%.*]] = extractelement <4 x i32> [[VAL0]], i32 2
; ALL-NEXT: [[VAL2_I2:%.*]] = shl i32 3, [[VAL0_I23]]
; ALL-NEXT: [[VAL2_I0:%.*]] = shl i32 1, [[VAL0_I0]]
; ALL-NEXT: [[VAL2_I1:%.*]] = shl i32 2, [[VAL0_I1]]
; ALL-NEXT: [[VAL2_I2:%.*]] = shl i32 3, [[VAL0_I2]]
; ALL-NEXT: [[VAL2_I3:%.*]] = shl i32 4, [[REPL:%.*]]
; ALL-NEXT: [[VAL2_UPTO0:%.*]] = insertelement <4 x i32> undef, i32 [[VAL2_I0]], i32 0
; ALL-NEXT: [[VAL2_UPTO1:%.*]] = insertelement <4 x i32> [[VAL2_UPTO0]], i32 [[VAL2_I1]], i32 1