mirror of
https://github.com/RPCS3/llvm-mirror.git
synced 2024-11-26 04:32:44 +01:00
[SVE][CodeGen] Fix legalisation for scalable types
Summary: This patch handles illegal scalable types when lowering IR operations, addressing several places where the value of isScalableVector() is ignored. For types such as <vscale x 8 x i32>, this means splitting the operations. In this example, we would split it into two operations of type <vscale x 4 x i32> for the low and high halves. In cases such as <vscale x 2 x i32>, the elements in the vector will be promoted. In this case they will be promoted to i64 (with a vector of type <vscale x 2 x i64>) Reviewers: sdesmalen, efriedma, huntergr Reviewed By: efriedma Subscribers: david-arm, tschuett, hiraditya, rkruppe, psnobl, cfe-commits, llvm-commits Tags: #llvm Differential Revision: https://reviews.llvm.org/D78812
This commit is contained in:
parent
18a0cecf64
commit
ea6936aebd
@ -731,10 +731,10 @@ static void getCopyToPartsVector(SelectionDAG &DAG, const SDLoc &DL,
|
||||
IntermediateVT.getVectorNumElements() : 1;
|
||||
|
||||
// Convert the vector to the appropriate type if necessary.
|
||||
unsigned DestVectorNoElts = NumIntermediates * IntermediateNumElts;
|
||||
|
||||
auto DestEltCnt = ElementCount(NumIntermediates * IntermediateNumElts,
|
||||
ValueVT.isScalableVector());
|
||||
EVT BuiltVectorTy = EVT::getVectorVT(
|
||||
*DAG.getContext(), IntermediateVT.getScalarType(), DestVectorNoElts);
|
||||
*DAG.getContext(), IntermediateVT.getScalarType(), DestEltCnt);
|
||||
if (ValueVT != BuiltVectorTy) {
|
||||
if (SDValue Widened = widenVectorToPartType(DAG, Val, DL, BuiltVectorTy))
|
||||
Val = Widened;
|
||||
|
@ -1392,7 +1392,7 @@ unsigned TargetLoweringBase::getVectorTypeBreakdown(LLVMContext &Context, EVT VT
|
||||
EVT &IntermediateVT,
|
||||
unsigned &NumIntermediates,
|
||||
MVT &RegisterVT) const {
|
||||
unsigned NumElts = VT.getVectorNumElements();
|
||||
ElementCount EltCnt = VT.getVectorElementCount();
|
||||
|
||||
// If there is a wider vector type with the same element type as this one,
|
||||
// or a promoted vector type that has the same number of elements which
|
||||
@ -1400,7 +1400,7 @@ unsigned TargetLoweringBase::getVectorTypeBreakdown(LLVMContext &Context, EVT VT
|
||||
// This handles things like <2 x float> -> <4 x float> and
|
||||
// <4 x i1> -> <4 x i32>.
|
||||
LegalizeTypeAction TA = getTypeAction(Context, VT);
|
||||
if (NumElts != 1 && (TA == TypeWidenVector || TA == TypePromoteInteger)) {
|
||||
if (EltCnt.Min != 1 && (TA == TypeWidenVector || TA == TypePromoteInteger)) {
|
||||
EVT RegisterEVT = getTypeToTransformTo(Context, VT);
|
||||
if (isTypeLegal(RegisterEVT)) {
|
||||
IntermediateVT = RegisterEVT;
|
||||
@ -1417,22 +1417,22 @@ unsigned TargetLoweringBase::getVectorTypeBreakdown(LLVMContext &Context, EVT VT
|
||||
|
||||
// FIXME: We don't support non-power-of-2-sized vectors for now. Ideally we
|
||||
// could break down into LHS/RHS like LegalizeDAG does.
|
||||
if (!isPowerOf2_32(NumElts)) {
|
||||
NumVectorRegs = NumElts;
|
||||
NumElts = 1;
|
||||
if (!isPowerOf2_32(EltCnt.Min)) {
|
||||
NumVectorRegs = EltCnt.Min;
|
||||
EltCnt.Min = 1;
|
||||
}
|
||||
|
||||
// Divide the input until we get to a supported size. This will always
|
||||
// end with a scalar if the target doesn't support vectors.
|
||||
while (NumElts > 1 && !isTypeLegal(
|
||||
EVT::getVectorVT(Context, EltTy, NumElts))) {
|
||||
NumElts >>= 1;
|
||||
while (EltCnt.Min > 1 &&
|
||||
!isTypeLegal(EVT::getVectorVT(Context, EltTy, EltCnt))) {
|
||||
EltCnt.Min >>= 1;
|
||||
NumVectorRegs <<= 1;
|
||||
}
|
||||
|
||||
NumIntermediates = NumVectorRegs;
|
||||
|
||||
EVT NewVT = EVT::getVectorVT(Context, EltTy, NumElts);
|
||||
EVT NewVT = EVT::getVectorVT(Context, EltTy, EltCnt);
|
||||
if (!isTypeLegal(NewVT))
|
||||
NewVT = EltTy;
|
||||
IntermediateVT = NewVT;
|
||||
|
@ -22,6 +22,37 @@ define <vscale x 2 x i64> @sdiv_i64(<vscale x 2 x i64> %a, <vscale x 2 x i64> %b
|
||||
ret <vscale x 2 x i64> %div
|
||||
}
|
||||
|
||||
define <vscale x 8 x i32> @sdiv_split_i32(<vscale x 8 x i32> %a, <vscale x 8 x i32> %b) {
|
||||
; CHECK-LABEL: @sdiv_split_i32
|
||||
; CHECK-DAG: ptrue p0.s
|
||||
; CHECK-DAG: sdiv z0.s, p0/m, z0.s, z2.s
|
||||
; CHECK-DAG: sdiv z1.s, p0/m, z1.s, z3.s
|
||||
; CHECK-NEXT: ret
|
||||
%div = sdiv <vscale x 8 x i32> %a, %b
|
||||
ret <vscale x 8 x i32> %div
|
||||
}
|
||||
|
||||
define <vscale x 2 x i32> @sdiv_widen_i32(<vscale x 2 x i32> %a, <vscale x 2 x i32> %b) {
|
||||
; CHECK-LABEL: @sdiv_widen_i32
|
||||
; CHECK-DAG: ptrue p0.d
|
||||
; CHECK-DAG: sxtw z1.d, p0/m, z1.d
|
||||
; CHECK-DAG: sxtw z0.d, p0/m, z0.d
|
||||
; CHECK-DAG: sdiv z0.d, p0/m, z0.d, z1.d
|
||||
; CHECK-NEXT: ret
|
||||
%div = sdiv <vscale x 2 x i32> %a, %b
|
||||
ret <vscale x 2 x i32> %div
|
||||
}
|
||||
|
||||
define <vscale x 4 x i64> @sdiv_split_i64(<vscale x 4 x i64> %a, <vscale x 4 x i64> %b) {
|
||||
; CHECK-LABEL: @sdiv_split_i64
|
||||
; CHECK-DAG: ptrue p0.d
|
||||
; CHECK-DAG: sdiv z0.d, p0/m, z0.d, z2.d
|
||||
; CHECK-DAG: sdiv z1.d, p0/m, z1.d, z3.d
|
||||
; CHECK-NEXT: ret
|
||||
%div = sdiv <vscale x 4 x i64> %a, %b
|
||||
ret <vscale x 4 x i64> %div
|
||||
}
|
||||
|
||||
;
|
||||
; UDIV
|
||||
;
|
||||
@ -44,6 +75,37 @@ define <vscale x 2 x i64> @udiv_i64(<vscale x 2 x i64> %a, <vscale x 2 x i64> %b
|
||||
ret <vscale x 2 x i64> %div
|
||||
}
|
||||
|
||||
define <vscale x 8 x i32> @udiv_split_i32(<vscale x 8 x i32> %a, <vscale x 8 x i32> %b) {
|
||||
; CHECK-LABEL: @udiv_split_i32
|
||||
; CHECK-DAG: ptrue p0.s
|
||||
; CHECK-DAG: udiv z0.s, p0/m, z0.s, z2.s
|
||||
; CHECK-DAG: udiv z1.s, p0/m, z1.s, z3.s
|
||||
; CHECK-NEXT: ret
|
||||
%div = udiv <vscale x 8 x i32> %a, %b
|
||||
ret <vscale x 8 x i32> %div
|
||||
}
|
||||
|
||||
define <vscale x 2 x i32> @udiv_widen_i32(<vscale x 2 x i32> %a, <vscale x 2 x i32> %b) {
|
||||
; CHECK-LABEL: @udiv_widen_i32
|
||||
; CHECK-DAG: ptrue p0.d
|
||||
; CHECK-DAG: and z1.d, z1.d, #0xffffffff
|
||||
; CHECK-DAG: and z0.d, z0.d, #0xffffffff
|
||||
; CHECK-DAG: udiv z0.d, p0/m, z0.d, z1.d
|
||||
; CHECK-NEXT: ret
|
||||
%div = udiv <vscale x 2 x i32> %a, %b
|
||||
ret <vscale x 2 x i32> %div
|
||||
}
|
||||
|
||||
define <vscale x 4 x i64> @udiv_split_i64(<vscale x 4 x i64> %a, <vscale x 4 x i64> %b) {
|
||||
; CHECK-LABEL: @udiv_split_i64
|
||||
; CHECK-DAG: ptrue p0.d
|
||||
; CHECK-DAG: udiv z0.d, p0/m, z0.d, z2.d
|
||||
; CHECK-DAG: udiv z1.d, p0/m, z1.d, z3.d
|
||||
; CHECK-NEXT: ret
|
||||
%div = udiv <vscale x 4 x i64> %a, %b
|
||||
ret <vscale x 4 x i64> %div
|
||||
}
|
||||
|
||||
;
|
||||
; SMIN
|
||||
;
|
||||
|
Loading…
Reference in New Issue
Block a user