1
0
mirror of https://github.com/RPCS3/llvm-mirror.git synced 2024-10-19 11:02:59 +02:00

[CodeGen][SVE] CopyToReg: Split scalable EVTs that are not powers of 2

Scalable vectors cannot use 'BUILD_VECTOR', so it is necessary to
properly split and widen scalable vectors when passing them
to CopyToReg/CopyFromReg.

This functionality is added to TargetLoweringBase::getVectorTypeBreakdown().

This patch only adds support for 'splitting' scalable vectors that
are a multiple of some legal type, e.g.

      <vscale x 6 x i64> -> 3 x <vscale x 2 x i64>

Reviewers: efriedma, c-rhodes

Reviewed By: efriedma

Tags: #llvm

Differential Revision: https://reviews.llvm.org/D80139
This commit is contained in:
Sander de Smalen 2020-06-05 18:29:43 +01:00
parent 16c9f00d4f
commit bf414ae934
2 changed files with 346 additions and 2 deletions

View File

@ -955,6 +955,12 @@ static unsigned getVectorTypeBreakdownMVT(MVT VT, MVT &IntermediateVT,
unsigned NumVectorRegs = 1;
// Scalable vectors cannot be scalarized, so splitting or widening is
// required.
if (VT.isScalableVector() && !isPowerOf2_32(EC.Min))
llvm_unreachable(
"Splitting or widening of non-power-of-2 MVTs is not implemented.");
// FIXME: We don't support non-power-of-2-sized vectors for now.
// Ideally we could break down into LHS/RHS like LegalizeDAG does.
if (!isPowerOf2_32(EC.Min)) {
@ -1418,8 +1424,34 @@ unsigned TargetLoweringBase::getVectorTypeBreakdown(LLVMContext &Context, EVT VT
unsigned NumVectorRegs = 1;
// FIXME: We don't support non-power-of-2-sized vectors for now. Ideally we
// could break down into LHS/RHS like LegalizeDAG does.
// Scalable vectors cannot be scalarized, so handle the legalisation of the
// types like done elsewhere in SelectionDAG.
if (VT.isScalableVector() && !isPowerOf2_32(EltCnt.Min)) {
LegalizeKind LK;
EVT PartVT = VT;
do {
// Iterate until we've found a legal (part) type to hold VT.
LK = getTypeConversion(Context, PartVT);
PartVT = LK.second;
} while (LK.first != TypeLegal);
NumIntermediates =
VT.getVectorElementCount().Min / PartVT.getVectorElementCount().Min;
// FIXME: This code needs to be extended to handle more complex vector
// breakdowns, like nxv7i64 -> nxv8i64 -> 4 x nxv2i64. Currently the only
// supported cases are vectors that are broken down into equal parts
// such as nxv6i64 -> 3 x nxv2i64.
assert(NumIntermediates * PartVT.getVectorElementCount().Min ==
VT.getVectorElementCount().Min &&
"Expected an integer multiple of PartVT");
IntermediateVT = PartVT;
RegisterVT = getRegisterType(Context, IntermediateVT);
return NumIntermediates;
}
// FIXME: We don't support non-power-of-2-sized vectors for now. Ideally
// we could break down into LHS/RHS like LegalizeDAG does.
if (!isPowerOf2_32(EltCnt.Min)) {
NumVectorRegs = EltCnt.Min;
EltCnt.Min = 1;

View File

@ -0,0 +1,312 @@
; RUN: llc -mtriple=aarch64--linux-gnu -mattr=+sve -asm-verbose=0 < %s | FileCheck %s
; Test that scalable vectors that are a multiple of the legal vector size
; can be properly broken down into part vectors.
declare void @bar()
;
; Vectors twice the size
;
define <vscale x 32 x i8> @wide_32i8(i1 %b, <vscale x 16 x i8> %legal, <vscale x 32 x i8> %illegal) nounwind {
; CHECK-LABEL: wide_32i8
; CHECK: mov z0.d, z1.d
; CHECK-NEXT: mov z1.d, z2.d
; CHECK-NEXT: ret
br i1 %b, label %L1, label %L2
L1:
call void @bar()
ret <vscale x 32 x i8> undef
L2:
ret <vscale x 32 x i8> %illegal
}
define <vscale x 16 x i16> @wide_16i16(i1 %b, <vscale x 16 x i8> %legal, <vscale x 16 x i16> %illegal) nounwind {
; CHECK-LABEL: wide_16i16
; CHECK: mov z0.d, z1.d
; CHECK-NEXT: mov z1.d, z2.d
; CHECK-NEXT: ret
br i1 %b, label %L1, label %L2
L1:
call void @bar()
ret <vscale x 16 x i16> undef
L2:
ret <vscale x 16 x i16> %illegal
}
define <vscale x 8 x i32> @wide_8i32(i1 %b, <vscale x 16 x i8> %legal, <vscale x 8 x i32> %illegal) nounwind {
; CHECK-LABEL: wide_8i32
; CHECK: mov z0.d, z1.d
; CHECK-NEXT: mov z1.d, z2.d
; CHECK-NEXT: ret
br i1 %b, label %L1, label %L2
L1:
call void @bar()
ret <vscale x 8 x i32> undef
L2:
ret <vscale x 8 x i32> %illegal
}
define <vscale x 4 x i64> @wide_4i64(i1 %b, <vscale x 16 x i8> %legal, <vscale x 4 x i64> %illegal) nounwind {
; CHECK-LABEL: wide_4i64
; CHECK: mov z0.d, z1.d
; CHECK-NEXT: mov z1.d, z2.d
; CHECK-NEXT: ret
br i1 %b, label %L1, label %L2
L1:
call void @bar()
ret <vscale x 4 x i64> undef
L2:
ret <vscale x 4 x i64> %illegal
}
define <vscale x 16 x half> @wide_16f16(i1 %b, <vscale x 16 x i8> %legal, <vscale x 16 x half> %illegal) nounwind {
; CHECK-LABEL: wide_16f16
; CHECK: mov z0.d, z1.d
; CHECK-NEXT: mov z1.d, z2.d
; CHECK-NEXT: ret
br i1 %b, label %L1, label %L2
L1:
call void @bar()
ret <vscale x 16 x half> undef
L2:
ret <vscale x 16 x half> %illegal
}
define <vscale x 8 x float> @wide_8f32(i1 %b, <vscale x 16 x i8> %legal, <vscale x 8 x float> %illegal) nounwind {
; CHECK-LABEL: wide_8f32
; CHECK: mov z0.d, z1.d
; CHECK-NEXT: mov z1.d, z2.d
; CHECK-NEXT: ret
br i1 %b, label %L1, label %L2
L1:
call void @bar()
ret <vscale x 8 x float> undef
L2:
ret <vscale x 8 x float> %illegal
}
define <vscale x 4 x double> @wide_4f64(i1 %b, <vscale x 16 x i8> %legal, <vscale x 4 x double> %illegal) nounwind {
; CHECK-LABEL: wide_4f64
; CHECK: mov z0.d, z1.d
; CHECK-NEXT: mov z1.d, z2.d
; CHECK-NEXT: ret
br i1 %b, label %L1, label %L2
L1:
call void @bar()
ret <vscale x 4 x double> undef
L2:
ret <vscale x 4 x double> %illegal
}
;
; Vectors three times the size
;
define <vscale x 48 x i8> @wide_48i8(i1 %b, <vscale x 16 x i8> %legal, <vscale x 48 x i8> %illegal) nounwind {
; CHECK-LABEL: wide_48i8
; CHECK: mov z0.d, z1.d
; CHECK-NEXT: mov z1.d, z2.d
; CHECK-NEXT: mov z2.d, z3.d
; CHECK-NEXT: ret
br i1 %b, label %L1, label %L2
L1:
call void @bar()
ret <vscale x 48 x i8> undef
L2:
ret <vscale x 48 x i8> %illegal
}
define <vscale x 24 x i16> @wide_24i16(i1 %b, <vscale x 16 x i8> %legal, <vscale x 24 x i16> %illegal) nounwind {
; CHECK-LABEL: wide_24i16
; CHECK: mov z0.d, z1.d
; CHECK-NEXT: mov z1.d, z2.d
; CHECK-NEXT: mov z2.d, z3.d
; CHECK-NEXT: ret
br i1 %b, label %L1, label %L2
L1:
call void @bar()
ret <vscale x 24 x i16> undef
L2:
ret <vscale x 24 x i16> %illegal
}
define <vscale x 12 x i32> @wide_12i32(i1 %b, <vscale x 16 x i8> %legal, <vscale x 12 x i32> %illegal) nounwind {
; CHECK-LABEL: wide_12i32
; CHECK: mov z0.d, z1.d
; CHECK-NEXT: mov z1.d, z2.d
; CHECK-NEXT: mov z2.d, z3.d
; CHECK-NEXT: ret
br i1 %b, label %L1, label %L2
L1:
call void @bar()
ret <vscale x 12 x i32> undef
L2:
ret <vscale x 12 x i32> %illegal
}
define <vscale x 6 x i64> @wide_6i64(i1 %b, <vscale x 16 x i8> %legal, <vscale x 6 x i64> %illegal) nounwind {
; CHECK-LABEL: wide_6i64
; CHECK: mov z0.d, z1.d
; CHECK-NEXT: mov z1.d, z2.d
; CHECK-NEXT: mov z2.d, z3.d
; CHECK-NEXT: ret
br i1 %b, label %L1, label %L2
L1:
call void @bar()
ret <vscale x 6 x i64> undef
L2:
ret <vscale x 6 x i64> %illegal
}
define <vscale x 24 x half> @wide_24f16(i1 %b, <vscale x 16 x i8> %legal, <vscale x 24 x half> %illegal) nounwind {
; CHECK-LABEL: wide_24f16
; CHECK: mov z0.d, z1.d
; CHECK-NEXT: mov z1.d, z2.d
; CHECK-NEXT: mov z2.d, z3.d
; CHECK-NEXT: ret
br i1 %b, label %L1, label %L2
L1:
call void @bar()
ret <vscale x 24 x half> undef
L2:
ret <vscale x 24 x half> %illegal
}
define <vscale x 12 x float> @wide_12f32(i1 %b, <vscale x 16 x i8> %legal, <vscale x 12 x float> %illegal) nounwind {
; CHECK-LABEL: wide_12f32
; CHECK: mov z0.d, z1.d
; CHECK-NEXT: mov z1.d, z2.d
; CHECK-NEXT: mov z2.d, z3.d
; CHECK-NEXT: ret
br i1 %b, label %L1, label %L2
L1:
call void @bar()
ret <vscale x 12 x float> undef
L2:
ret <vscale x 12 x float> %illegal
}
define <vscale x 6 x double> @wide_6f64(i1 %b, <vscale x 16 x i8> %legal, <vscale x 6 x double> %illegal) nounwind {
; CHECK-LABEL: wide_6f64
; CHECK: mov z0.d, z1.d
; CHECK-NEXT: mov z1.d, z2.d
; CHECK-NEXT: mov z2.d, z3.d
; CHECK-NEXT: ret
br i1 %b, label %L1, label %L2
L1:
call void @bar()
ret <vscale x 6 x double> undef
L2:
ret <vscale x 6 x double> %illegal
}
;
; Vectors four times the size
;
define <vscale x 64 x i8> @wide_64i8(i1 %b, <vscale x 16 x i8> %legal, <vscale x 64 x i8> %illegal) nounwind {
; CHECK-LABEL: wide_64i8
; CHECK: mov z0.d, z1.d
; CHECK-NEXT: mov z1.d, z2.d
; CHECK-NEXT: mov z2.d, z3.d
; CHECK-NEXT: mov z3.d, z4.d
; CHECK-NEXT: ret
br i1 %b, label %L1, label %L2
L1:
call void @bar()
ret <vscale x 64 x i8> undef
L2:
ret <vscale x 64 x i8> %illegal
}
define <vscale x 32 x i16> @wide_32i16(i1 %b, <vscale x 16 x i8> %legal, <vscale x 32 x i16> %illegal) nounwind {
; CHECK-LABEL: wide_32i16
; CHECK: mov z0.d, z1.d
; CHECK-NEXT: mov z1.d, z2.d
; CHECK-NEXT: mov z2.d, z3.d
; CHECK-NEXT: mov z3.d, z4.d
; CHECK-NEXT: ret
br i1 %b, label %L1, label %L2
L1:
call void @bar()
ret <vscale x 32 x i16> undef
L2:
ret <vscale x 32 x i16> %illegal
}
define <vscale x 16 x i32> @wide_16i32(i1 %b, <vscale x 16 x i8> %legal, <vscale x 16 x i32> %illegal) nounwind {
; CHECK-LABEL: wide_16i32
; CHECK: mov z0.d, z1.d
; CHECK-NEXT: mov z1.d, z2.d
; CHECK-NEXT: mov z2.d, z3.d
; CHECK-NEXT: mov z3.d, z4.d
; CHECK-NEXT: ret
br i1 %b, label %L1, label %L2
L1:
call void @bar()
ret <vscale x 16 x i32> undef
L2:
ret <vscale x 16 x i32> %illegal
}
define <vscale x 8 x i64> @wide_8i64(i1 %b, <vscale x 16 x i8> %legal, <vscale x 8 x i64> %illegal) nounwind {
; CHECK-LABEL: wide_8i64
; CHECK: mov z0.d, z1.d
; CHECK-NEXT: mov z1.d, z2.d
; CHECK-NEXT: mov z2.d, z3.d
; CHECK-NEXT: mov z3.d, z4.d
; CHECK-NEXT: ret
br i1 %b, label %L1, label %L2
L1:
call void @bar()
ret <vscale x 8 x i64> undef
L2:
ret <vscale x 8 x i64> %illegal
}
define <vscale x 32 x half> @wide_32f16(i1 %b, <vscale x 16 x i8> %legal, <vscale x 32 x half> %illegal) nounwind {
; CHECK-LABEL: wide_32f16
; CHECK: mov z0.d, z1.d
; CHECK-NEXT: mov z1.d, z2.d
; CHECK-NEXT: mov z2.d, z3.d
; CHECK-NEXT: mov z3.d, z4.d
; CHECK-NEXT: ret
br i1 %b, label %L1, label %L2
L1:
call void @bar()
ret <vscale x 32 x half> undef
L2:
ret <vscale x 32 x half> %illegal
}
define <vscale x 16 x float> @wide_16f32(i1 %b, <vscale x 16 x i8> %legal, <vscale x 16 x float> %illegal) nounwind {
; CHECK-LABEL: wide_16f32
; CHECK: mov z0.d, z1.d
; CHECK-NEXT: mov z1.d, z2.d
; CHECK-NEXT: mov z2.d, z3.d
; CHECK-NEXT: mov z3.d, z4.d
; CHECK-NEXT: ret
br i1 %b, label %L1, label %L2
L1:
call void @bar()
ret <vscale x 16 x float> undef
L2:
ret <vscale x 16 x float> %illegal
}
define <vscale x 8 x double> @wide_8f64(i1 %b, <vscale x 16 x i8> %legal, <vscale x 8 x double> %illegal) nounwind {
; CHECK-LABEL: wide_8f64
; CHECK: mov z0.d, z1.d
; CHECK-NEXT: mov z1.d, z2.d
; CHECK-NEXT: mov z2.d, z3.d
; CHECK-NEXT: mov z3.d, z4.d
; CHECK-NEXT: ret
br i1 %b, label %L1, label %L2
L1:
call void @bar()
ret <vscale x 8 x double> undef
L2:
ret <vscale x 8 x double> %illegal
}