1
0
mirror of https://github.com/RPCS3/llvm-mirror.git synced 2024-11-26 04:32:44 +01:00

[SLPVectorizer] Fix crash in vectorizeChainsInBlock for scalable vector.

The function vectorizeChainsInBlock does not support scalable vector,
because function like canReuseExtract and isCommutative in the code
path assert with scalable vectors.

This patch avoids vectorizing blocks that have extract instructions with scalable
vector..

Differential Revision: https://reviews.llvm.org/D104809
This commit is contained in:
Caroline Concatto 2021-06-21 15:22:58 +01:00
parent e338c9199a
commit eae00d0f38
2 changed files with 106 additions and 1 deletions

View File

@ -2675,6 +2675,15 @@ void BoUpSLP::buildTree_rec(ArrayRef<Value *> VL, unsigned Depth,
return;
}
// Don't handle scalable vectors
if (S.getOpcode() == Instruction::ExtractElement &&
isa<ScalableVectorType>(
cast<ExtractElementInst>(S.OpValue)->getVectorOperandType())) {
LLVM_DEBUG(dbgs() << "SLP: Gathering due to scalable vector type.\n");
newTreeEntry(VL, None /*not vectorized*/, S, UserTreeIdx);
return;
}
// Don't handle vectors.
if (S.OpValue->getType()->isVectorTy() &&
!isa<InsertElementInst>(S.OpValue)) {
@ -3739,7 +3748,9 @@ InstructionCost BoUpSLP::getEntryCost(const TreeEntry *E,
0);
}
if (E->getOpcode() == Instruction::ExtractElement && allSameType(VL) &&
allSameBlock(VL)) {
allSameBlock(VL) &&
!isa<ScalableVectorType>(
cast<ExtractElementInst>(E->getMainOp())->getVectorOperandType())) {
// Check that gather of extractelements can be represented as just a
// shuffle of a single/two vectors the scalars are extracted from.
SmallVector<int> Mask;

View File

@ -44,5 +44,99 @@ end:
ret <vscale x 4 x i32> %retval
}
define void @scalable_phi1() {
; CHECK-LABEL: @scalable_phi1(
; CHECK-NEXT: middle.block:
; CHECK-NEXT: [[EXTRACT1:%.*]] = extractelement <vscale x 8 x i16> undef, i32 undef
; CHECK-NEXT: [[EXTRACT2:%.*]] = extractelement <vscale x 8 x i16> undef, i32 undef
; CHECK-NEXT: br label [[FOR_BODY_I:%.*]]
; CHECK: for.body.i:
; CHECK-NEXT: [[RECUR1:%.*]] = phi i16 [ [[EXTRACT1]], [[MIDDLE_BLOCK:%.*]] ], [ undef, [[FOR_BODY_I]] ]
; CHECK-NEXT: [[RECUR2:%.*]] = phi i16 [ [[EXTRACT2]], [[MIDDLE_BLOCK]] ], [ undef, [[FOR_BODY_I]] ]
; CHECK-NEXT: br label [[FOR_BODY_I]]
;
middle.block:
%extract1 = extractelement <vscale x 8 x i16> undef, i32 undef
%extract2 = extractelement <vscale x 8 x i16> undef, i32 undef
br label %for.body.i
for.body.i: ; preds = %for.body.i, %middle.block
%recur1 = phi i16 [ %extract1, %middle.block ], [ undef, %for.body.i ]
%recur2 = phi i16 [ %extract2, %middle.block ], [ undef, %for.body.i ]
br label %for.body.i
}
define void @scalable_phi2() {
; CHECK-LABEL: @scalable_phi2(
; CHECK-NEXT: middle.block:
; CHECK-NEXT: [[EXTRACT1:%.*]] = extractelement <vscale x 8 x i16> undef, i32 undef
; CHECK-NEXT: [[EXTRACT2:%.*]] = extractelement <vscale x 8 x i16> undef, i32 undef
; CHECK-NEXT: br label [[FOR_BODY_I:%.*]]
; CHECK: for.body.i:
; CHECK-NEXT: [[RECUR1:%.*]] = phi i16 [ undef, [[FOR_BODY_I]] ], [ [[EXTRACT1]], [[MIDDLE_BLOCK:%.*]] ]
; CHECK-NEXT: [[RECUR2:%.*]] = phi i16 [ undef, [[FOR_BODY_I]] ], [ [[EXTRACT2]], [[MIDDLE_BLOCK]] ]
; CHECK-NEXT: br label [[FOR_BODY_I]]
;
middle.block:
%extract1 = extractelement <vscale x 8 x i16> undef, i32 undef
%extract2 = extractelement <vscale x 8 x i16> undef, i32 undef
br label %for.body.i
for.body.i: ; preds = %for.body.i, %middle.block
%recur1 = phi i16 [ undef, %for.body.i ], [ %extract1, %middle.block ]
%recur2 = phi i16 [ undef, %for.body.i ], [ %extract2, %middle.block ]
br label %for.body.i
}
define <vscale x 4 x i32> @build_vec_v4i32_reuse_0(<vscale x 2 x i32> %v0) {
; CHECK-LABEL: @build_vec_v4i32_reuse_0(
; CHECK-NEXT: [[V0_0:%.*]] = extractelement <vscale x 2 x i32> [[V0:%.*]], i32 0
; CHECK-NEXT: [[V0_1:%.*]] = extractelement <vscale x 2 x i32> [[V0]], i32 1
; CHECK-NEXT: [[TMP0_0:%.*]] = add i32 [[V0_0]], [[V0_0]]
; CHECK-NEXT: [[TMP1_0:%.*]] = sub i32 [[V0_0]], [[V0_1]]
; CHECK-NEXT: [[TMP2_0:%.*]] = add i32 [[TMP0_0]], [[TMP1_0]]
; CHECK-NEXT: [[TMP3_0:%.*]] = insertelement <vscale x 4 x i32> undef, i32 [[TMP2_0]], i32 0
; CHECK-NEXT: ret <vscale x 4 x i32> [[TMP3_0]]
;
%v0.0 = extractelement <vscale x 2 x i32> %v0, i32 0
%v0.1 = extractelement <vscale x 2 x i32> %v0, i32 1
%tmp0.0 = add i32 %v0.0, %v0.0
%tmp1.0 = sub i32 %v0.0, %v0.1
%tmp2.0 = add i32 %tmp0.0, %tmp1.0
%tmp3.0 = insertelement <vscale x 4 x i32> undef, i32 %tmp2.0, i32 0
ret <vscale x 4 x i32> %tmp3.0
}
define <vscale x 4 x i8> @shuffle(<4 x i8> %x, <4 x i8> %y) {
; CHECK-LABEL: @shuffle(
; CHECK-NEXT: [[X0:%.*]] = extractelement <4 x i8> [[X:%.*]], i32 0
; CHECK-NEXT: [[X3:%.*]] = extractelement <4 x i8> [[X]], i32 3
; CHECK-NEXT: [[Y1:%.*]] = extractelement <4 x i8> [[Y:%.*]], i32 1
; CHECK-NEXT: [[Y2:%.*]] = extractelement <4 x i8> [[Y]], i32 2
; CHECK-NEXT: [[X0X0:%.*]] = mul i8 [[X0]], [[X0]]
; CHECK-NEXT: [[X3X3:%.*]] = mul i8 [[X3]], [[X3]]
; CHECK-NEXT: [[Y1Y1:%.*]] = mul i8 [[Y1]], [[Y1]]
; CHECK-NEXT: [[Y2Y2:%.*]] = mul i8 [[Y2]], [[Y2]]
; CHECK-NEXT: [[INS1:%.*]] = insertelement <vscale x 4 x i8> poison, i8 [[X0X0]], i32 0
; CHECK-NEXT: [[INS2:%.*]] = insertelement <vscale x 4 x i8> [[INS1]], i8 [[X3X3]], i32 1
; CHECK-NEXT: [[INS3:%.*]] = insertelement <vscale x 4 x i8> [[INS2]], i8 [[Y1Y1]], i32 2
; CHECK-NEXT: [[INS4:%.*]] = insertelement <vscale x 4 x i8> [[INS3]], i8 [[Y2Y2]], i32 3
; CHECK-NEXT: ret <vscale x 4 x i8> [[INS4]]
;
%x0 = extractelement <4 x i8> %x, i32 0
%x3 = extractelement <4 x i8> %x, i32 3
%y1 = extractelement <4 x i8> %y, i32 1
%y2 = extractelement <4 x i8> %y, i32 2
%x0x0 = mul i8 %x0, %x0
%x3x3 = mul i8 %x3, %x3
%y1y1 = mul i8 %y1, %y1
%y2y2 = mul i8 %y2, %y2
%ins1 = insertelement <vscale x 4 x i8> poison, i8 %x0x0, i32 0
%ins2 = insertelement <vscale x 4 x i8> %ins1, i8 %x3x3, i32 1
%ins3 = insertelement <vscale x 4 x i8> %ins2, i8 %y1y1, i32 2
%ins4 = insertelement <vscale x 4 x i8> %ins3, i8 %y2y2, i32 3
ret <vscale x 4 x i8> %ins4
}
declare <vscale x 16 x i8> @llvm.masked.load.nxv16i8.p0nxv16i8(<vscale x 16 x i8>*, i32 immarg, <vscale x 16 x i1>, <vscale x 16 x i8>)
declare void @llvm.masked.store.nxv16i8.p0nxv16i8(<vscale x 16 x i8>, <vscale x 16 x i8>*, i32 immarg, <vscale x 16 x i1>)