mirror of
https://github.com/RPCS3/llvm-mirror.git
synced 2024-11-26 04:32:44 +01:00
[Scalarizer] InsertElement handling w/ constant insert index
Summary: As it can be clearly seen from the diff, this results in nicer IR. Reviewers: jdoerfert, arsenm, bjope, cameron.mcinally Reviewed By: jdoerfert Subscribers: arphaman, wdng, hiraditya, llvm-commits Tags: #llvm Differential Revision: https://reviews.llvm.org/D83102
This commit is contained in:
parent
3c48d18cf9
commit
bc4a979a8a
@ -192,6 +192,7 @@ public:
|
||||
bool visitGetElementPtrInst(GetElementPtrInst &GEPI);
|
||||
bool visitCastInst(CastInst &CI);
|
||||
bool visitBitCastInst(BitCastInst &BCI);
|
||||
bool visitInsertElementInst(InsertElementInst &IEI);
|
||||
bool visitShuffleVectorInst(ShuffleVectorInst &SVI);
|
||||
bool visitPHINode(PHINode &PHI);
|
||||
bool visitLoadInst(LoadInst &LI);
|
||||
@ -389,7 +390,7 @@ void ScalarizerVisitor::gather(Instruction *Op, const ValueVector &CV) {
|
||||
if (!SV.empty()) {
|
||||
for (unsigned I = 0, E = SV.size(); I != E; ++I) {
|
||||
Value *V = SV[I];
|
||||
if (V == nullptr)
|
||||
if (V == nullptr || SV[I] == CV[I])
|
||||
continue;
|
||||
|
||||
Instruction *Old = cast<Instruction>(V);
|
||||
@ -740,6 +741,31 @@ bool ScalarizerVisitor::visitBitCastInst(BitCastInst &BCI) {
|
||||
return true;
|
||||
}
|
||||
|
||||
bool ScalarizerVisitor::visitInsertElementInst(InsertElementInst &IEI) {
|
||||
VectorType *VT = dyn_cast<VectorType>(IEI.getType());
|
||||
if (!VT)
|
||||
return false;
|
||||
|
||||
unsigned NumElems = VT->getNumElements();
|
||||
IRBuilder<> Builder(&IEI);
|
||||
Scatterer Op0 = scatter(&IEI, IEI.getOperand(0));
|
||||
Value *NewElt = IEI.getOperand(1);
|
||||
Value *InsIdx = IEI.getOperand(2);
|
||||
|
||||
ValueVector Res;
|
||||
Res.resize(NumElems);
|
||||
|
||||
if (auto *CI = dyn_cast<ConstantInt>(InsIdx)) {
|
||||
for (unsigned I = 0; I < NumElems; ++I)
|
||||
Res[I] = CI->getValue().getZExtValue() == I ? NewElt : Op0[I];
|
||||
} else {
|
||||
return false;
|
||||
}
|
||||
|
||||
gather(&IEI, Res);
|
||||
return true;
|
||||
}
|
||||
|
||||
bool ScalarizerVisitor::visitShuffleVectorInst(ShuffleVectorInst &SVI) {
|
||||
VectorType *VT = dyn_cast<VectorType>(SVI.getType());
|
||||
if (!VT)
|
||||
|
@ -276,14 +276,14 @@ define void @f8(<4 x float *> *%dest, <4 x float *> %ptr0, <4 x i32> %i0,
|
||||
; CHECK: %dest.i1 = getelementptr float*, float** %dest.i0, i32 1
|
||||
; CHECK: %dest.i2 = getelementptr float*, float** %dest.i0, i32 2
|
||||
; CHECK: %dest.i3 = getelementptr float*, float** %dest.i0, i32 3
|
||||
; CHECK: %ptr0.i0 = extractelement <4 x float*> %ptr0, i32 0
|
||||
; CHECK: %ptr0.i2 = extractelement <4 x float*> %ptr0, i32 2
|
||||
; CHECK: %ptr0.i3 = extractelement <4 x float*> %ptr0, i32 3
|
||||
; CHECK: %i0.i1 = extractelement <4 x i32> %i0, i32 1
|
||||
; CHECK: %i0.i3 = extractelement <4 x i32> %i0, i32 3
|
||||
; CHECK: %ptr0.i0 = extractelement <4 x float*> %ptr0, i32 0
|
||||
; CHECK: %val.i0 = getelementptr float, float* %ptr0.i0, i32 100
|
||||
; CHECK: %val.i1 = getelementptr float, float* %other, i32 %i0.i1
|
||||
; CHECK: %ptr0.i2 = extractelement <4 x float*> %ptr0, i32 2
|
||||
; CHECK: %val.i2 = getelementptr float, float* %ptr0.i2, i32 100
|
||||
; CHECK: %ptr0.i3 = extractelement <4 x float*> %ptr0, i32 3
|
||||
; CHECK: %val.i3 = getelementptr float, float* %ptr0.i3, i32 %i0.i3
|
||||
; CHECK: store float* %val.i0, float** %dest.i0, align 32
|
||||
; CHECK: store float* %val.i1, float** %dest.i1, align 8
|
||||
|
@ -12,18 +12,9 @@ define <4 x i32> @f1(<4 x i32> *%src, i32 %repl, i32 %index) {
|
||||
; ALL-NEXT: [[VAL0_I1:%.*]] = load i32, i32* [[SRC_I1]], align 4
|
||||
; ALL-NEXT: [[SRC_I2:%.*]] = getelementptr i32, i32* [[SRC_I0]], i32 2
|
||||
; ALL-NEXT: [[VAL0_I2:%.*]] = load i32, i32* [[SRC_I2]], align 8
|
||||
; ALL-NEXT: [[SRC_I3:%.*]] = getelementptr i32, i32* [[SRC_I0]], i32 3
|
||||
; ALL-NEXT: [[VAL0_I3:%.*]] = load i32, i32* [[SRC_I3]], align 4
|
||||
; ALL-NEXT: [[VAL0_UPTO0:%.*]] = insertelement <4 x i32> undef, i32 [[VAL0_I0]], i32 0
|
||||
; ALL-NEXT: [[VAL0_UPTO1:%.*]] = insertelement <4 x i32> [[VAL0_UPTO0]], i32 [[VAL0_I1]], i32 1
|
||||
; ALL-NEXT: [[VAL0_UPTO2:%.*]] = insertelement <4 x i32> [[VAL0_UPTO1]], i32 [[VAL0_I2]], i32 2
|
||||
; ALL-NEXT: [[VAL0:%.*]] = insertelement <4 x i32> [[VAL0_UPTO2]], i32 [[VAL0_I3]], i32 3
|
||||
; ALL-NEXT: [[VAL0_I01:%.*]] = extractelement <4 x i32> [[VAL0]], i32 0
|
||||
; ALL-NEXT: [[VAL2_I0:%.*]] = shl i32 1, [[VAL0_I01]]
|
||||
; ALL-NEXT: [[VAL0_I12:%.*]] = extractelement <4 x i32> [[VAL0]], i32 1
|
||||
; ALL-NEXT: [[VAL2_I1:%.*]] = shl i32 2, [[VAL0_I12]]
|
||||
; ALL-NEXT: [[VAL0_I23:%.*]] = extractelement <4 x i32> [[VAL0]], i32 2
|
||||
; ALL-NEXT: [[VAL2_I2:%.*]] = shl i32 3, [[VAL0_I23]]
|
||||
; ALL-NEXT: [[VAL2_I0:%.*]] = shl i32 1, [[VAL0_I0]]
|
||||
; ALL-NEXT: [[VAL2_I1:%.*]] = shl i32 2, [[VAL0_I1]]
|
||||
; ALL-NEXT: [[VAL2_I2:%.*]] = shl i32 3, [[VAL0_I2]]
|
||||
; ALL-NEXT: [[VAL2_I3:%.*]] = shl i32 4, [[REPL:%.*]]
|
||||
; ALL-NEXT: [[VAL2_UPTO0:%.*]] = insertelement <4 x i32> undef, i32 [[VAL2_I0]], i32 0
|
||||
; ALL-NEXT: [[VAL2_UPTO1:%.*]] = insertelement <4 x i32> [[VAL2_UPTO0]], i32 [[VAL2_I1]], i32 1
|
||||
|
Loading…
Reference in New Issue
Block a user