mirror of
https://github.com/RPCS3/llvm-mirror.git
synced 2024-11-26 12:43:36 +01:00
[AArch64][SVE] Add fixed length codegen for FP_TO_{S,U}INT/{S,U}INT_TO_FP
Depends on D102607 Differential Revision: https://reviews.llvm.org/D102777
This commit is contained in:
parent
af1a311083
commit
1d41093ecb
@ -1482,6 +1482,8 @@ void AArch64TargetLowering::addTypeForFixedLengthSVE(MVT VT) {
|
||||
setOperationAction(ISD::FNEG, VT, Custom);
|
||||
setOperationAction(ISD::FP_EXTEND, VT, Custom);
|
||||
setOperationAction(ISD::FP_ROUND, VT, Custom);
|
||||
setOperationAction(ISD::FP_TO_SINT, VT, Custom);
|
||||
setOperationAction(ISD::FP_TO_UINT, VT, Custom);
|
||||
setOperationAction(ISD::FRINT, VT, Custom);
|
||||
setOperationAction(ISD::FROUND, VT, Custom);
|
||||
setOperationAction(ISD::FROUNDEVEN, VT, Custom);
|
||||
@ -1501,6 +1503,7 @@ void AArch64TargetLowering::addTypeForFixedLengthSVE(MVT VT) {
|
||||
setOperationAction(ISD::SHL, VT, Custom);
|
||||
setOperationAction(ISD::SIGN_EXTEND, VT, Custom);
|
||||
setOperationAction(ISD::SIGN_EXTEND_INREG, VT, Custom);
|
||||
setOperationAction(ISD::SINT_TO_FP, VT, Custom);
|
||||
setOperationAction(ISD::SMAX, VT, Custom);
|
||||
setOperationAction(ISD::SMIN, VT, Custom);
|
||||
setOperationAction(ISD::SPLAT_VECTOR, VT, Custom);
|
||||
@ -1510,6 +1513,7 @@ void AArch64TargetLowering::addTypeForFixedLengthSVE(MVT VT) {
|
||||
setOperationAction(ISD::SUB, VT, Custom);
|
||||
setOperationAction(ISD::TRUNCATE, VT, Custom);
|
||||
setOperationAction(ISD::UDIV, VT, Custom);
|
||||
setOperationAction(ISD::UINT_TO_FP, VT, Custom);
|
||||
setOperationAction(ISD::UMAX, VT, Custom);
|
||||
setOperationAction(ISD::UMIN, VT, Custom);
|
||||
setOperationAction(ISD::VECREDUCE_ADD, VT, Custom);
|
||||
@ -3260,6 +3264,9 @@ SDValue AArch64TargetLowering::LowerVectorFP_TO_INT(SDValue Op,
|
||||
return LowerToPredicatedOp(Op, DAG, Opcode);
|
||||
}
|
||||
|
||||
if (useSVEForFixedLengthVectorVT(VT) || useSVEForFixedLengthVectorVT(InVT))
|
||||
return LowerFixedLengthFPToIntToSVE(Op, DAG);
|
||||
|
||||
unsigned NumElts = InVT.getVectorNumElements();
|
||||
|
||||
// f16 conversions are promoted to f32 when full fp16 is not supported.
|
||||
@ -3384,6 +3391,9 @@ SDValue AArch64TargetLowering::LowerVectorINT_TO_FP(SDValue Op,
|
||||
return LowerToPredicatedOp(Op, DAG, Opcode);
|
||||
}
|
||||
|
||||
if (useSVEForFixedLengthVectorVT(VT) || useSVEForFixedLengthVectorVT(InVT))
|
||||
return LowerFixedLengthIntToFPToSVE(Op, DAG);
|
||||
|
||||
uint64_t VTSize = VT.getFixedSizeInBits();
|
||||
uint64_t InVTSize = InVT.getFixedSizeInBits();
|
||||
if (VTSize < InVTSize) {
|
||||
@ -17994,6 +18004,95 @@ AArch64TargetLowering::LowerFixedLengthFPRoundToSVE(SDValue Op,
|
||||
return DAG.getNode(ISD::BITCAST, DL, VT, Val);
|
||||
}
|
||||
|
||||
SDValue
|
||||
AArch64TargetLowering::LowerFixedLengthIntToFPToSVE(SDValue Op,
|
||||
SelectionDAG &DAG) const {
|
||||
EVT VT = Op.getValueType();
|
||||
assert(VT.isFixedLengthVector() && "Expected fixed length vector type!");
|
||||
|
||||
bool IsSigned = Op.getOpcode() == ISD::SINT_TO_FP;
|
||||
unsigned Opcode = IsSigned ? AArch64ISD::SINT_TO_FP_MERGE_PASSTHRU
|
||||
: AArch64ISD::UINT_TO_FP_MERGE_PASSTHRU;
|
||||
|
||||
SDLoc DL(Op);
|
||||
SDValue Val = Op.getOperand(0);
|
||||
EVT SrcVT = Val.getValueType();
|
||||
EVT ContainerDstVT = getContainerForFixedLengthVector(DAG, VT);
|
||||
EVT ContainerSrcVT = getContainerForFixedLengthVector(DAG, SrcVT);
|
||||
|
||||
if (ContainerSrcVT.getVectorElementType().getSizeInBits() <=
|
||||
ContainerDstVT.getVectorElementType().getSizeInBits()) {
|
||||
SDValue Pg = getPredicateForVector(DAG, DL, VT);
|
||||
|
||||
Val = DAG.getNode(IsSigned ? ISD::SIGN_EXTEND : ISD::ZERO_EXTEND, DL,
|
||||
VT.changeTypeToInteger(), Val);
|
||||
|
||||
Val = convertToScalableVector(DAG, ContainerSrcVT, Val);
|
||||
Val = getSVESafeBitCast(ContainerDstVT.changeTypeToInteger(), Val, DAG);
|
||||
// Safe to use a larger than specified operand since we just unpacked the
|
||||
// data, hence the upper bits are zero.
|
||||
Val = DAG.getNode(Opcode, DL, ContainerDstVT, Pg, Val,
|
||||
DAG.getUNDEF(ContainerDstVT));
|
||||
return convertFromScalableVector(DAG, VT, Val);
|
||||
} else {
|
||||
EVT CvtVT = ContainerSrcVT.changeVectorElementType(
|
||||
ContainerDstVT.getVectorElementType());
|
||||
SDValue Pg = getPredicateForVector(DAG, DL, CvtVT);
|
||||
|
||||
Val = convertToScalableVector(DAG, ContainerSrcVT, Val);
|
||||
Val = DAG.getNode(Opcode, DL, CvtVT, Pg, Val, DAG.getUNDEF(CvtVT));
|
||||
Val = getSVESafeBitCast(ContainerSrcVT, Val, DAG);
|
||||
Val = convertFromScalableVector(DAG, SrcVT, Val);
|
||||
|
||||
Val = DAG.getNode(ISD::TRUNCATE, DL, VT.changeTypeToInteger(), Val);
|
||||
return DAG.getNode(ISD::BITCAST, DL, VT, Val);
|
||||
}
|
||||
}
|
||||
|
||||
SDValue
|
||||
AArch64TargetLowering::LowerFixedLengthFPToIntToSVE(SDValue Op,
|
||||
SelectionDAG &DAG) const {
|
||||
EVT VT = Op.getValueType();
|
||||
assert(VT.isFixedLengthVector() && "Expected fixed length vector type!");
|
||||
|
||||
bool IsSigned = Op.getOpcode() == ISD::FP_TO_SINT;
|
||||
unsigned Opcode = IsSigned ? AArch64ISD::FCVTZS_MERGE_PASSTHRU
|
||||
: AArch64ISD::FCVTZU_MERGE_PASSTHRU;
|
||||
|
||||
SDLoc DL(Op);
|
||||
SDValue Val = Op.getOperand(0);
|
||||
EVT SrcVT = Val.getValueType();
|
||||
EVT ContainerDstVT = getContainerForFixedLengthVector(DAG, VT);
|
||||
EVT ContainerSrcVT = getContainerForFixedLengthVector(DAG, SrcVT);
|
||||
|
||||
if (ContainerSrcVT.getVectorElementType().getSizeInBits() <=
|
||||
ContainerDstVT.getVectorElementType().getSizeInBits()) {
|
||||
EVT CvtVT = ContainerDstVT.changeVectorElementType(
|
||||
ContainerSrcVT.getVectorElementType());
|
||||
SDValue Pg = getPredicateForVector(DAG, DL, VT);
|
||||
|
||||
Val = DAG.getNode(ISD::BITCAST, DL, SrcVT.changeTypeToInteger(), Val);
|
||||
Val = DAG.getNode(ISD::ANY_EXTEND, DL, VT, Val);
|
||||
|
||||
Val = convertToScalableVector(DAG, ContainerSrcVT, Val);
|
||||
Val = getSVESafeBitCast(CvtVT, Val, DAG);
|
||||
Val = DAG.getNode(Opcode, DL, ContainerDstVT, Pg, Val,
|
||||
DAG.getUNDEF(ContainerDstVT));
|
||||
return convertFromScalableVector(DAG, VT, Val);
|
||||
} else {
|
||||
EVT CvtVT = ContainerSrcVT.changeTypeToInteger();
|
||||
SDValue Pg = getPredicateForVector(DAG, DL, CvtVT);
|
||||
|
||||
// Safe to use a larger than specified result since an fp_to_int where the
|
||||
// result doesn't fit into the destination is undefined.
|
||||
Val = convertToScalableVector(DAG, ContainerSrcVT, Val);
|
||||
Val = DAG.getNode(Opcode, DL, CvtVT, Pg, Val, DAG.getUNDEF(CvtVT));
|
||||
Val = convertFromScalableVector(DAG, SrcVT.changeTypeToInteger(), Val);
|
||||
|
||||
return DAG.getNode(ISD::TRUNCATE, DL, VT, Val);
|
||||
}
|
||||
}
|
||||
|
||||
SDValue AArch64TargetLowering::getSVESafeBitCast(EVT VT, SDValue Op,
|
||||
SelectionDAG &DAG) const {
|
||||
SDLoc DL(Op);
|
||||
|
@ -1005,6 +1005,8 @@ private:
|
||||
SelectionDAG &DAG) const;
|
||||
SDValue LowerFixedLengthFPExtendToSVE(SDValue Op, SelectionDAG &DAG) const;
|
||||
SDValue LowerFixedLengthFPRoundToSVE(SDValue Op, SelectionDAG &DAG) const;
|
||||
SDValue LowerFixedLengthIntToFPToSVE(SDValue Op, SelectionDAG &DAG) const;
|
||||
SDValue LowerFixedLengthFPToIntToSVE(SDValue Op, SelectionDAG &DAG) const;
|
||||
|
||||
SDValue BuildSDIVPow2(SDNode *N, const APInt &Divisor, SelectionDAG &DAG,
|
||||
SmallVectorImpl<SDNode *> &Created) const override;
|
||||
|
@ -1,70 +0,0 @@
|
||||
; RUN: llc -aarch64-sve-vector-bits-min=128 -asm-verbose=0 < %s | FileCheck %s -check-prefix=NO_SVE
|
||||
; RUN: llc -aarch64-sve-vector-bits-min=256 -asm-verbose=0 < %s | FileCheck %s
|
||||
; RUN: llc -aarch64-sve-vector-bits-min=384 -asm-verbose=0 < %s | FileCheck %s
|
||||
; RUN: llc -aarch64-sve-vector-bits-min=512 -asm-verbose=0 < %s | FileCheck %s
|
||||
; RUN: llc -aarch64-sve-vector-bits-min=640 -asm-verbose=0 < %s | FileCheck %s
|
||||
; RUN: llc -aarch64-sve-vector-bits-min=768 -asm-verbose=0 < %s | FileCheck %s
|
||||
; RUN: llc -aarch64-sve-vector-bits-min=896 -asm-verbose=0 < %s | FileCheck %s
|
||||
; RUN: llc -aarch64-sve-vector-bits-min=1024 -asm-verbose=0 < %s | FileCheck %s
|
||||
; RUN: llc -aarch64-sve-vector-bits-min=1152 -asm-verbose=0 < %s | FileCheck %s
|
||||
; RUN: llc -aarch64-sve-vector-bits-min=1280 -asm-verbose=0 < %s | FileCheck %s
|
||||
; RUN: llc -aarch64-sve-vector-bits-min=1408 -asm-verbose=0 < %s | FileCheck %s
|
||||
; RUN: llc -aarch64-sve-vector-bits-min=1536 -asm-verbose=0 < %s | FileCheck %s
|
||||
; RUN: llc -aarch64-sve-vector-bits-min=1664 -asm-verbose=0 < %s | FileCheck %s
|
||||
; RUN: llc -aarch64-sve-vector-bits-min=1792 -asm-verbose=0 < %s | FileCheck %s
|
||||
; RUN: llc -aarch64-sve-vector-bits-min=1920 -asm-verbose=0 < %s | FileCheck %s
|
||||
; RUN: llc -aarch64-sve-vector-bits-min=2048 -asm-verbose=0 < %s | FileCheck %s
|
||||
|
||||
target triple = "aarch64-unknown-linux-gnu"
|
||||
|
||||
; Don't use SVE when its registers are no bigger than NEON.
|
||||
; NO_SVE-NOT: z{0-9}
|
||||
|
||||
; NOTE: fptrunc operations bigger than NEON are expanded. These tests just
|
||||
; ensure we've correctly set the operation action for fixed length vector types
|
||||
; that require SVE. They'll be updated to protect their expected code generation
|
||||
; when lowering it implemented.
|
||||
|
||||
;
|
||||
; vector uint_to_fp i8 -> f32
|
||||
; AArch64 doesn't have a direct vector->f32 conversion instructions for
|
||||
; elements smaller than i32, so make sure inputs are promoted to i32 first.
|
||||
;
|
||||
|
||||
define void @uitofp_v4i8_v4f32(<4 x i8>* %in, <4 x float>* %out) #0 {
|
||||
; CHECK-LABEL: uitofp_v4i8_v4f32:
|
||||
; CHECK-COUNT-1: ucvt
|
||||
%vec = load <4 x i8>, <4 x i8>* %in
|
||||
%conv = uitofp <4 x i8> %vec to <4 x float>
|
||||
store <4 x float> %conv, <4 x float>* %out
|
||||
ret void
|
||||
}
|
||||
|
||||
define void @uitofp_v8i8_v8f32(<8 x i8>* %in, <8 x float>* %out) #0 {
|
||||
; CHECK-LABEL: uitofp_v8i8_v8f32:
|
||||
; CHECK-COUNT-8: ucvt
|
||||
%vec = load <8 x i8>, <8 x i8>* %in
|
||||
%conv = uitofp <8 x i8> %vec to <8 x float>
|
||||
store <8 x float> %conv, <8 x float>* %out
|
||||
ret void
|
||||
}
|
||||
|
||||
define void @uitofp_v16i8_v16f32(<16 x i8>* %in, <16 x float>* %out) #0 {
|
||||
; CHECK-LABEL: uitofp_v16i8_v16f32:
|
||||
; CHECK-COUNT-16: ucvt
|
||||
%vec = load <16 x i8>, <16 x i8>* %in
|
||||
%conv = uitofp <16 x i8> %vec to <16 x float>
|
||||
store <16 x float> %conv, <16 x float>* %out
|
||||
ret void
|
||||
}
|
||||
|
||||
define void @uitofp_v32i8_v32f32(<32 x i8>* %in, <32 x float>* %out) #0 {
|
||||
; CHECK-LABEL: uitofp_v32i8_v32f32:
|
||||
; CHECK-COUNT-32: ucvt
|
||||
%vec = load <32 x i8>, <32 x i8>* %in
|
||||
%conv = uitofp <32 x i8> %vec to <32 x float>
|
||||
store <32 x float> %conv, <32 x float>* %out
|
||||
ret void
|
||||
}
|
||||
|
||||
attributes #0 = { nounwind "target-features"="+sve" }
|
1761
test/CodeGen/AArch64/sve-fixed-length-fp-to-int.ll
Normal file
1761
test/CodeGen/AArch64/sve-fixed-length-fp-to-int.ll
Normal file
File diff suppressed because it is too large
Load Diff
1759
test/CodeGen/AArch64/sve-fixed-length-int-to-fp.ll
Normal file
1759
test/CodeGen/AArch64/sve-fixed-length-int-to-fp.ll
Normal file
File diff suppressed because it is too large
Load Diff
Loading…
Reference in New Issue
Block a user