mirror of
https://github.com/RPCS3/llvm-mirror.git
synced 2024-11-25 20:23:11 +01:00
[X86][LegalizeTypes] Add SoftPromoteHalf support STRICT_FP_EXTEND and STRICT_FP_ROUND
This adds a strict version of FP16_TO_FP and FP_TO_FP16 and uses them to implement soft promotion for the half type. This is enough to provide basic support for __fp16 with strictfp. Add the necessary X86 support to use VCVTPS2PH/VCVTPH2PS when F16C is enabled.
This commit is contained in:
parent
2a138c13f8
commit
42881b3a20
@ -633,6 +633,7 @@ namespace ISD {
|
||||
/// form a semi-softened interface for dealing with f16 (as an i16), which
|
||||
/// is often a storage-only type but has native conversions.
|
||||
FP16_TO_FP, FP_TO_FP16,
|
||||
STRICT_FP16_TO_FP, STRICT_FP_TO_FP16,
|
||||
|
||||
/// Perform various unary floating-point operations inspired by libm. For
|
||||
/// FPOWI, the result is undefined if if the integer operand doesn't fit
|
||||
|
@ -701,6 +701,8 @@ public:
|
||||
switch (NodeType) {
|
||||
default:
|
||||
return false;
|
||||
case ISD::STRICT_FP16_TO_FP:
|
||||
case ISD::STRICT_FP_TO_FP16:
|
||||
#define DAG_INSTRUCTION(NAME, NARG, ROUND_MODE, INTRINSIC, DAGN) \
|
||||
case ISD::STRICT_##DAGN:
|
||||
#include "llvm/IR/ConstrainedOps.def"
|
||||
|
@ -1009,6 +1009,7 @@ void SelectionDAGLegalize::LegalizeOp(SDNode *Node) {
|
||||
Action = TLI.getOperationAction(Node->getOpcode(),
|
||||
Node->getOperand(0).getValueType());
|
||||
break;
|
||||
case ISD::STRICT_FP_TO_FP16:
|
||||
case ISD::STRICT_SINT_TO_FP:
|
||||
case ISD::STRICT_UINT_TO_FP:
|
||||
case ISD::STRICT_LRINT:
|
||||
@ -3272,6 +3273,21 @@ bool SelectionDAGLegalize::ExpandNode(SDNode *Node) {
|
||||
DAG.getNode(ISD::FP_EXTEND, dl, Node->getValueType(0), Res));
|
||||
}
|
||||
break;
|
||||
case ISD::STRICT_FP16_TO_FP:
|
||||
if (Node->getValueType(0) != MVT::f32) {
|
||||
// We can extend to types bigger than f32 in two steps without changing
|
||||
// the result. Since "f16 -> f32" is much more commonly available, give
|
||||
// CodeGen the option of emitting that before resorting to a libcall.
|
||||
SDValue Res =
|
||||
DAG.getNode(ISD::STRICT_FP16_TO_FP, dl, {MVT::f32, MVT::Other},
|
||||
{Node->getOperand(0), Node->getOperand(1)});
|
||||
Res = DAG.getNode(ISD::STRICT_FP_EXTEND, dl,
|
||||
{Node->getValueType(0), MVT::Other},
|
||||
{Res.getValue(1), Res});
|
||||
Results.push_back(Res);
|
||||
Results.push_back(Res.getValue(1));
|
||||
}
|
||||
break;
|
||||
case ISD::FP_TO_FP16:
|
||||
LLVM_DEBUG(dbgs() << "Legalizing FP_TO_FP16\n");
|
||||
if (!TLI.useSoftFloat() && TM.Options.UnsafeFPMath) {
|
||||
@ -4234,6 +4250,17 @@ void SelectionDAGLegalize::ConvertNodeToLibcall(SDNode *Node) {
|
||||
Results.push_back(ExpandLibCall(RTLIB::FPEXT_F16_F32, Node, false));
|
||||
}
|
||||
break;
|
||||
case ISD::STRICT_FP16_TO_FP: {
|
||||
if (Node->getValueType(0) == MVT::f32) {
|
||||
TargetLowering::MakeLibCallOptions CallOptions;
|
||||
std::pair<SDValue, SDValue> Tmp = TLI.makeLibCall(
|
||||
DAG, RTLIB::FPEXT_F16_F32, MVT::f32, Node->getOperand(1), CallOptions,
|
||||
SDLoc(Node), Node->getOperand(0));
|
||||
Results.push_back(Tmp.first);
|
||||
Results.push_back(Tmp.second);
|
||||
}
|
||||
break;
|
||||
}
|
||||
case ISD::FP_TO_FP16: {
|
||||
RTLIB::Libcall LC =
|
||||
RTLIB::getFPROUND(Node->getOperand(0).getValueType(), MVT::f16);
|
||||
@ -4241,6 +4268,19 @@ void SelectionDAGLegalize::ConvertNodeToLibcall(SDNode *Node) {
|
||||
Results.push_back(ExpandLibCall(LC, Node, false));
|
||||
break;
|
||||
}
|
||||
case ISD::STRICT_FP_TO_FP16: {
|
||||
RTLIB::Libcall LC =
|
||||
RTLIB::getFPROUND(Node->getOperand(1).getValueType(), MVT::f16);
|
||||
assert(LC != RTLIB::UNKNOWN_LIBCALL &&
|
||||
"Unable to expand strict_fp_to_fp16");
|
||||
TargetLowering::MakeLibCallOptions CallOptions;
|
||||
std::pair<SDValue, SDValue> Tmp =
|
||||
TLI.makeLibCall(DAG, LC, Node->getValueType(0), Node->getOperand(1),
|
||||
CallOptions, SDLoc(Node), Node->getOperand(0));
|
||||
Results.push_back(Tmp.first);
|
||||
Results.push_back(Tmp.second);
|
||||
break;
|
||||
}
|
||||
case ISD::FSUB:
|
||||
case ISD::STRICT_FSUB:
|
||||
ExpandFPLibCall(Node, RTLIB::SUB_F32, RTLIB::SUB_F64,
|
||||
|
@ -2440,6 +2440,7 @@ void DAGTypeLegalizer::SoftPromoteHalfResult(SDNode *N, unsigned ResNo) {
|
||||
case ISD::EXTRACT_VECTOR_ELT:
|
||||
R = SoftPromoteHalfRes_EXTRACT_VECTOR_ELT(N); break;
|
||||
case ISD::FCOPYSIGN: R = SoftPromoteHalfRes_FCOPYSIGN(N); break;
|
||||
case ISD::STRICT_FP_ROUND:
|
||||
case ISD::FP_ROUND: R = SoftPromoteHalfRes_FP_ROUND(N); break;
|
||||
|
||||
// Unary FP Operations
|
||||
@ -2592,6 +2593,14 @@ SDValue DAGTypeLegalizer::SoftPromoteHalfRes_FPOWI(SDNode *N) {
|
||||
}
|
||||
|
||||
SDValue DAGTypeLegalizer::SoftPromoteHalfRes_FP_ROUND(SDNode *N) {
|
||||
if (N->isStrictFPOpcode()) {
|
||||
SDValue Res =
|
||||
DAG.getNode(ISD::STRICT_FP_TO_FP16, SDLoc(N), {MVT::i16, MVT::Other},
|
||||
{N->getOperand(0), N->getOperand(1)});
|
||||
ReplaceValueWith(SDValue(N, 1), Res.getValue(1));
|
||||
return Res;
|
||||
}
|
||||
|
||||
return DAG.getNode(ISD::FP_TO_FP16, SDLoc(N), MVT::i16, N->getOperand(0));
|
||||
}
|
||||
|
||||
@ -2701,6 +2710,7 @@ bool DAGTypeLegalizer::SoftPromoteHalfOperand(SDNode *N, unsigned OpNo) {
|
||||
case ISD::FCOPYSIGN: Res = SoftPromoteHalfOp_FCOPYSIGN(N, OpNo); break;
|
||||
case ISD::FP_TO_SINT:
|
||||
case ISD::FP_TO_UINT: Res = SoftPromoteHalfOp_FP_TO_XINT(N); break;
|
||||
case ISD::STRICT_FP_EXTEND:
|
||||
case ISD::FP_EXTEND: Res = SoftPromoteHalfOp_FP_EXTEND(N); break;
|
||||
case ISD::SELECT_CC: Res = SoftPromoteHalfOp_SELECT_CC(N, OpNo); break;
|
||||
case ISD::SETCC: Res = SoftPromoteHalfOp_SETCC(N); break;
|
||||
@ -2741,7 +2751,18 @@ SDValue DAGTypeLegalizer::SoftPromoteHalfOp_FCOPYSIGN(SDNode *N,
|
||||
}
|
||||
|
||||
SDValue DAGTypeLegalizer::SoftPromoteHalfOp_FP_EXTEND(SDNode *N) {
|
||||
SDValue Op = GetSoftPromotedHalf(N->getOperand(0));
|
||||
bool IsStrict = N->isStrictFPOpcode();
|
||||
SDValue Op = GetSoftPromotedHalf(N->getOperand(IsStrict ? 1 : 0));
|
||||
|
||||
if (IsStrict) {
|
||||
SDValue Res =
|
||||
DAG.getNode(ISD::STRICT_FP16_TO_FP, SDLoc(N),
|
||||
{N->getValueType(0), MVT::Other}, {N->getOperand(0), Op});
|
||||
ReplaceValueWith(SDValue(N, 1), Res.getValue(1));
|
||||
ReplaceValueWith(SDValue(N, 0), Res);
|
||||
return SDValue();
|
||||
}
|
||||
|
||||
return DAG.getNode(ISD::FP16_TO_FP, SDLoc(N), N->getValueType(0), Op);
|
||||
}
|
||||
|
||||
|
@ -342,7 +342,9 @@ std::string SDNode::getOperationName(const SelectionDAG *G) const {
|
||||
case ISD::BITCAST: return "bitcast";
|
||||
case ISD::ADDRSPACECAST: return "addrspacecast";
|
||||
case ISD::FP16_TO_FP: return "fp16_to_fp";
|
||||
case ISD::STRICT_FP16_TO_FP: return "strict_fp16_to_fp";
|
||||
case ISD::FP_TO_FP16: return "fp_to_fp16";
|
||||
case ISD::STRICT_FP_TO_FP16: return "strict_fp_to_fp16";
|
||||
case ISD::LROUND: return "lround";
|
||||
case ISD::STRICT_LROUND: return "strict_lround";
|
||||
case ISD::LLROUND: return "llround";
|
||||
|
@ -374,20 +374,30 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM,
|
||||
// If we don't have F16C support, then lower half float conversions
|
||||
// into library calls.
|
||||
if (!Subtarget.useSoftFloat() && Subtarget.hasF16C()) {
|
||||
setOperationAction(ISD::FP16_TO_FP, MVT::f32, Custom);
|
||||
setOperationAction(ISD::FP_TO_FP16, MVT::f32, Custom);
|
||||
setOperationAction(ISD::FP16_TO_FP, MVT::f32, Custom);
|
||||
setOperationAction(ISD::STRICT_FP16_TO_FP, MVT::f32, Custom);
|
||||
setOperationAction(ISD::FP_TO_FP16, MVT::f32, Custom);
|
||||
setOperationAction(ISD::STRICT_FP_TO_FP16, MVT::f32, Custom);
|
||||
} else {
|
||||
setOperationAction(ISD::FP16_TO_FP, MVT::f32, Expand);
|
||||
setOperationAction(ISD::FP_TO_FP16, MVT::f32, Expand);
|
||||
setOperationAction(ISD::FP16_TO_FP, MVT::f32, Expand);
|
||||
setOperationAction(ISD::STRICT_FP16_TO_FP, MVT::f32, Expand);
|
||||
setOperationAction(ISD::FP_TO_FP16, MVT::f32, Expand);
|
||||
setOperationAction(ISD::STRICT_FP_TO_FP16, MVT::f32, Expand);
|
||||
}
|
||||
|
||||
// There's never any support for operations beyond MVT::f32.
|
||||
setOperationAction(ISD::FP16_TO_FP, MVT::f64, Expand);
|
||||
setOperationAction(ISD::FP16_TO_FP, MVT::f80, Expand);
|
||||
setOperationAction(ISD::FP16_TO_FP, MVT::f128, Expand);
|
||||
setOperationAction(ISD::STRICT_FP16_TO_FP, MVT::f64, Expand);
|
||||
setOperationAction(ISD::STRICT_FP16_TO_FP, MVT::f80, Expand);
|
||||
setOperationAction(ISD::STRICT_FP16_TO_FP, MVT::f128, Expand);
|
||||
setOperationAction(ISD::FP_TO_FP16, MVT::f64, Expand);
|
||||
setOperationAction(ISD::FP_TO_FP16, MVT::f80, Expand);
|
||||
setOperationAction(ISD::FP_TO_FP16, MVT::f128, Expand);
|
||||
setOperationAction(ISD::STRICT_FP_TO_FP16, MVT::f64, Expand);
|
||||
setOperationAction(ISD::STRICT_FP_TO_FP16, MVT::f80, Expand);
|
||||
setOperationAction(ISD::STRICT_FP_TO_FP16, MVT::f128, Expand);
|
||||
|
||||
setLoadExtAction(ISD::EXTLOAD, MVT::f32, MVT::f16, Expand);
|
||||
setLoadExtAction(ISD::EXTLOAD, MVT::f64, MVT::f16, Expand);
|
||||
@ -20553,29 +20563,64 @@ SDValue X86TargetLowering::LowerFP_ROUND(SDValue Op, SelectionDAG &DAG) const {
|
||||
}
|
||||
|
||||
static SDValue LowerFP16_TO_FP(SDValue Op, SelectionDAG &DAG) {
|
||||
assert(Op.getOperand(0).getValueType() == MVT::i16 &&
|
||||
Op.getValueType() == MVT::f32 && "Unexpected VT!");
|
||||
bool IsStrict = Op->isStrictFPOpcode();
|
||||
SDValue Src = Op.getOperand(IsStrict ? 1 : 0);
|
||||
assert(Src.getValueType() == MVT::i16 && Op.getValueType() == MVT::f32 &&
|
||||
"Unexpected VT!");
|
||||
|
||||
SDLoc dl(Op);
|
||||
SDValue Res = DAG.getNode(ISD::INSERT_VECTOR_ELT, dl, MVT::v8i16,
|
||||
DAG.getConstant(0, dl, MVT::v8i16),
|
||||
Op.getOperand(0), DAG.getIntPtrConstant(0, dl));
|
||||
Res = DAG.getNode(X86ISD::CVTPH2PS, dl, MVT::v4f32, Res);
|
||||
return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, MVT::f32, Res,
|
||||
DAG.getIntPtrConstant(0, dl));
|
||||
DAG.getConstant(0, dl, MVT::v8i16), Src,
|
||||
DAG.getIntPtrConstant(0, dl));
|
||||
|
||||
SDValue Chain;
|
||||
if (IsStrict) {
|
||||
Res = DAG.getNode(X86ISD::STRICT_CVTPH2PS, dl, {MVT::v4f32, MVT::Other},
|
||||
{Op.getOperand(0), Res});
|
||||
Chain = Res.getValue(1);
|
||||
} else {
|
||||
Res = DAG.getNode(X86ISD::CVTPH2PS, dl, MVT::v4f32, Res);
|
||||
}
|
||||
|
||||
Res = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, MVT::f32, Res,
|
||||
DAG.getIntPtrConstant(0, dl));
|
||||
|
||||
if (IsStrict)
|
||||
return DAG.getMergeValues({Res, Chain}, dl);
|
||||
|
||||
return Res;
|
||||
}
|
||||
|
||||
static SDValue LowerFP_TO_FP16(SDValue Op, SelectionDAG &DAG) {
|
||||
assert(Op.getOperand(0).getValueType() == MVT::f32 &&
|
||||
Op.getValueType() == MVT::i16 && "Unexpected VT!");
|
||||
bool IsStrict = Op->isStrictFPOpcode();
|
||||
SDValue Src = Op.getOperand(IsStrict ? 1 : 0);
|
||||
assert(Src.getValueType() == MVT::f32 && Op.getValueType() == MVT::i16 &&
|
||||
"Unexpected VT!");
|
||||
|
||||
SDLoc dl(Op);
|
||||
SDValue Res = DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, MVT::v4f32,
|
||||
Op.getOperand(0));
|
||||
Res = DAG.getNode(X86ISD::CVTPS2PH, dl, MVT::v8i16, Res,
|
||||
DAG.getTargetConstant(4, dl, MVT::i32));
|
||||
return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, MVT::i16, Res,
|
||||
DAG.getIntPtrConstant(0, dl));
|
||||
SDValue Res, Chain;
|
||||
if (IsStrict) {
|
||||
Res = DAG.getNode(ISD::INSERT_VECTOR_ELT, dl, MVT::v4f32,
|
||||
DAG.getConstantFP(0, dl, MVT::v4f32), Src,
|
||||
DAG.getIntPtrConstant(0, dl));
|
||||
Res = DAG.getNode(
|
||||
X86ISD::STRICT_CVTPS2PH, dl, {MVT::v8i16, MVT::Other},
|
||||
{Op.getOperand(0), Res, DAG.getTargetConstant(4, dl, MVT::i32)});
|
||||
Chain = Res.getValue(1);
|
||||
} else {
|
||||
// FIXME: Should we use zeros for upper elements for non-strict?
|
||||
Res = DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, MVT::v4f32, Src);
|
||||
Res = DAG.getNode(X86ISD::CVTPS2PH, dl, MVT::v8i16, Res,
|
||||
DAG.getTargetConstant(4, dl, MVT::i32));
|
||||
}
|
||||
|
||||
Res = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, MVT::i16, Res,
|
||||
DAG.getIntPtrConstant(0, dl));
|
||||
|
||||
if (IsStrict)
|
||||
return DAG.getMergeValues({Res, Chain}, dl);
|
||||
|
||||
return Res;
|
||||
}
|
||||
|
||||
/// Depending on uarch and/or optimizing for size, we might prefer to use a
|
||||
@ -28821,8 +28866,10 @@ SDValue X86TargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const {
|
||||
case ISD::STRICT_FP_EXTEND: return LowerFP_EXTEND(Op, DAG);
|
||||
case ISD::FP_ROUND:
|
||||
case ISD::STRICT_FP_ROUND: return LowerFP_ROUND(Op, DAG);
|
||||
case ISD::FP16_TO_FP: return LowerFP16_TO_FP(Op, DAG);
|
||||
case ISD::FP_TO_FP16: return LowerFP_TO_FP16(Op, DAG);
|
||||
case ISD::FP16_TO_FP:
|
||||
case ISD::STRICT_FP16_TO_FP: return LowerFP16_TO_FP(Op, DAG);
|
||||
case ISD::FP_TO_FP16:
|
||||
case ISD::STRICT_FP_TO_FP16: return LowerFP_TO_FP16(Op, DAG);
|
||||
case ISD::LOAD: return LowerLoad(Op, Subtarget, DAG);
|
||||
case ISD::STORE: return LowerStore(Op, Subtarget, DAG);
|
||||
case ISD::FADD:
|
||||
@ -30162,8 +30209,10 @@ const char *X86TargetLowering::getTargetNodeName(unsigned Opcode) const {
|
||||
NODE_NAME_CASE(SCALAR_UINT_TO_FP)
|
||||
NODE_NAME_CASE(SCALAR_UINT_TO_FP_RND)
|
||||
NODE_NAME_CASE(CVTPS2PH)
|
||||
NODE_NAME_CASE(STRICT_CVTPS2PH)
|
||||
NODE_NAME_CASE(MCVTPS2PH)
|
||||
NODE_NAME_CASE(CVTPH2PS)
|
||||
NODE_NAME_CASE(STRICT_CVTPH2PS)
|
||||
NODE_NAME_CASE(CVTPH2PS_SAE)
|
||||
NODE_NAME_CASE(CVTP2SI)
|
||||
NODE_NAME_CASE(CVTP2UI)
|
||||
|
@ -627,6 +627,9 @@ namespace llvm {
|
||||
// Strict FMA nodes.
|
||||
STRICT_FNMADD, STRICT_FMSUB, STRICT_FNMSUB,
|
||||
|
||||
// Conversions between float and half-float.
|
||||
STRICT_CVTPS2PH, STRICT_CVTPH2PS,
|
||||
|
||||
// Compare and swap.
|
||||
LCMPXCHG_DAG = ISD::FIRST_TARGET_MEMORY_OPCODE,
|
||||
LCMPXCHG8_DAG,
|
||||
|
@ -8568,14 +8568,15 @@ let Uses = [MXCSR], mayRaiseFPException = 1 in
|
||||
multiclass avx512_cvtph2ps<X86VectorVTInfo _dest, X86VectorVTInfo _src,
|
||||
X86MemOperand x86memop, PatFrag ld_frag,
|
||||
X86FoldableSchedWrite sched> {
|
||||
defm rr : AVX512_maskable<0x13, MRMSrcReg, _dest ,(outs _dest.RC:$dst),
|
||||
defm rr : AVX512_maskable_split<0x13, MRMSrcReg, _dest ,(outs _dest.RC:$dst),
|
||||
(ins _src.RC:$src), "vcvtph2ps", "$src", "$src",
|
||||
(X86any_cvtph2ps (_src.VT _src.RC:$src)),
|
||||
(X86cvtph2ps (_src.VT _src.RC:$src))>,
|
||||
T8PD, Sched<[sched]>;
|
||||
defm rm : AVX512_maskable<0x13, MRMSrcMem, _dest, (outs _dest.RC:$dst),
|
||||
defm rm : AVX512_maskable_split<0x13, MRMSrcMem, _dest, (outs _dest.RC:$dst),
|
||||
(ins x86memop:$src), "vcvtph2ps", "$src", "$src",
|
||||
(X86cvtph2ps (_src.VT
|
||||
(ld_frag addr:$src)))>,
|
||||
(X86any_cvtph2ps (_src.VT (ld_frag addr:$src))),
|
||||
(X86cvtph2ps (_src.VT (ld_frag addr:$src)))>,
|
||||
T8PD, Sched<[sched.Folded]>;
|
||||
}
|
||||
|
||||
@ -8604,9 +8605,9 @@ let Predicates = [HasVLX] in {
|
||||
EVEX_CD8<32, CD8VH>;
|
||||
|
||||
// Pattern match vcvtph2ps of a scalar i64 load.
|
||||
def : Pat<(v4f32 (X86cvtph2ps (bc_v8i16 (v2i64 (X86vzload64 addr:$src))))),
|
||||
def : Pat<(v4f32 (X86any_cvtph2ps (bc_v8i16 (v2i64 (X86vzload64 addr:$src))))),
|
||||
(VCVTPH2PSZ128rm addr:$src)>;
|
||||
def : Pat<(v4f32 (X86cvtph2ps (v8i16 (bitconvert
|
||||
def : Pat<(v4f32 (X86any_cvtph2ps (v8i16 (bitconvert
|
||||
(v2i64 (scalar_to_vector (loadi64 addr:$src))))))),
|
||||
(VCVTPH2PSZ128rm addr:$src)>;
|
||||
}
|
||||
@ -8618,7 +8619,7 @@ let ExeDomain = GenericDomain, Uses = [MXCSR], mayRaiseFPException = 1 in {
|
||||
(ins _src.RC:$src1, i32u8imm:$src2),
|
||||
"vcvtps2ph\t{$src2, $src1, $dst|$dst, $src1, $src2}",
|
||||
[(set _dest.RC:$dst,
|
||||
(X86cvtps2ph (_src.VT _src.RC:$src1), (i32 timm:$src2)))]>,
|
||||
(X86any_cvtps2ph (_src.VT _src.RC:$src1), (i32 timm:$src2)))]>,
|
||||
Sched<[RR]>;
|
||||
let Constraints = "$src0 = $dst" in
|
||||
def rrk : AVX512AIi8<0x1D, MRMDestReg, (outs _dest.RC:$dst),
|
||||
@ -8673,16 +8674,16 @@ let Predicates = [HasAVX512] in {
|
||||
}
|
||||
|
||||
def : Pat<(store (f64 (extractelt
|
||||
(bc_v2f64 (v8i16 (X86cvtps2ph VR128X:$src1, timm:$src2))),
|
||||
(bc_v2f64 (v8i16 (X86any_cvtps2ph VR128X:$src1, timm:$src2))),
|
||||
(iPTR 0))), addr:$dst),
|
||||
(VCVTPS2PHZ128mr addr:$dst, VR128X:$src1, timm:$src2)>;
|
||||
def : Pat<(store (i64 (extractelt
|
||||
(bc_v2i64 (v8i16 (X86cvtps2ph VR128X:$src1, timm:$src2))),
|
||||
(bc_v2i64 (v8i16 (X86any_cvtps2ph VR128X:$src1, timm:$src2))),
|
||||
(iPTR 0))), addr:$dst),
|
||||
(VCVTPS2PHZ128mr addr:$dst, VR128X:$src1, timm:$src2)>;
|
||||
def : Pat<(store (v8i16 (X86cvtps2ph VR256X:$src1, timm:$src2)), addr:$dst),
|
||||
def : Pat<(store (v8i16 (X86any_cvtps2ph VR256X:$src1, timm:$src2)), addr:$dst),
|
||||
(VCVTPS2PHZ256mr addr:$dst, VR256X:$src1, timm:$src2)>;
|
||||
def : Pat<(store (v16i16 (X86cvtps2ph VR512:$src1, timm:$src2)), addr:$dst),
|
||||
def : Pat<(store (v16i16 (X86any_cvtps2ph VR512:$src1, timm:$src2)), addr:$dst),
|
||||
(VCVTPS2PHZmr addr:$dst, VR512:$src1, timm:$src2)>;
|
||||
}
|
||||
|
||||
|
@ -721,19 +721,27 @@ def X86mcvtp2UInt : SDNode<"X86ISD::MCVTP2UI", SDTMFloatToInt>;
|
||||
def X86mcvttp2si : SDNode<"X86ISD::MCVTTP2SI", SDTMFloatToInt>;
|
||||
def X86mcvttp2ui : SDNode<"X86ISD::MCVTTP2UI", SDTMFloatToInt>;
|
||||
|
||||
def SDTcvtph2ps : SDTypeProfile<1, 1, [SDTCVecEltisVT<0, f32>,
|
||||
SDTCVecEltisVT<1, i16>]>;
|
||||
def X86cvtph2ps : SDNode<"X86ISD::CVTPH2PS", SDTcvtph2ps>;
|
||||
def X86strict_cvtph2ps : SDNode<"X86ISD::STRICT_CVTPH2PS", SDTcvtph2ps,
|
||||
[SDNPHasChain]>;
|
||||
def X86any_cvtph2ps : PatFrags<(ops node:$src),
|
||||
[(X86strict_cvtph2ps node:$src),
|
||||
(X86cvtph2ps node:$src)]>;
|
||||
|
||||
def X86cvtph2ps : SDNode<"X86ISD::CVTPH2PS",
|
||||
SDTypeProfile<1, 1, [SDTCVecEltisVT<0, f32>,
|
||||
SDTCVecEltisVT<1, i16>]> >;
|
||||
def X86cvtph2psSAE : SDNode<"X86ISD::CVTPH2PS_SAE", SDTcvtph2ps>;
|
||||
|
||||
def X86cvtph2psSAE : SDNode<"X86ISD::CVTPH2PS_SAE",
|
||||
SDTypeProfile<1, 1, [SDTCVecEltisVT<0, f32>,
|
||||
SDTCVecEltisVT<1, i16>]> >;
|
||||
def SDTcvtps2ph : SDTypeProfile<1, 2, [SDTCVecEltisVT<0, i16>,
|
||||
SDTCVecEltisVT<1, f32>,
|
||||
SDTCisVT<2, i32>]>;
|
||||
def X86cvtps2ph : SDNode<"X86ISD::CVTPS2PH", SDTcvtps2ph>;
|
||||
def X86strict_cvtps2ph : SDNode<"X86ISD::STRICT_CVTPS2PH", SDTcvtps2ph,
|
||||
[SDNPHasChain]>;
|
||||
def X86any_cvtps2ph : PatFrags<(ops node:$src1, node:$src2),
|
||||
[(X86strict_cvtps2ph node:$src1, node:$src2),
|
||||
(X86cvtps2ph node:$src1, node:$src2)]>;
|
||||
|
||||
def X86cvtps2ph : SDNode<"X86ISD::CVTPS2PH",
|
||||
SDTypeProfile<1, 2, [SDTCVecEltisVT<0, i16>,
|
||||
SDTCVecEltisVT<1, f32>,
|
||||
SDTCisVT<2, i32>]> >;
|
||||
def X86mcvtps2ph : SDNode<"X86ISD::MCVTPS2PH",
|
||||
SDTypeProfile<1, 4, [SDTCVecEltisVT<0, i16>,
|
||||
SDTCVecEltisVT<1, f32>,
|
||||
|
@ -7337,12 +7337,12 @@ multiclass f16c_ph2ps<RegisterClass RC, X86MemOperand x86memop,
|
||||
X86FoldableSchedWrite sched> {
|
||||
def rr : I<0x13, MRMSrcReg, (outs RC:$dst), (ins VR128:$src),
|
||||
"vcvtph2ps\t{$src, $dst|$dst, $src}",
|
||||
[(set RC:$dst, (X86cvtph2ps VR128:$src))]>,
|
||||
[(set RC:$dst, (X86any_cvtph2ps VR128:$src))]>,
|
||||
T8PD, VEX, Sched<[sched]>;
|
||||
let hasSideEffects = 0, mayLoad = 1 in
|
||||
def rm : I<0x13, MRMSrcMem, (outs RC:$dst), (ins x86memop:$src),
|
||||
"vcvtph2ps\t{$src, $dst|$dst, $src}",
|
||||
[(set RC:$dst, (X86cvtph2ps (loadv8i16 addr:$src)))]>,
|
||||
[(set RC:$dst, (X86any_cvtph2ps (loadv8i16 addr:$src)))]>,
|
||||
T8PD, VEX, Sched<[sched.Folded]>;
|
||||
}
|
||||
|
||||
@ -7351,7 +7351,7 @@ multiclass f16c_ps2ph<RegisterClass RC, X86MemOperand x86memop,
|
||||
def rr : Ii8<0x1D, MRMDestReg, (outs VR128:$dst),
|
||||
(ins RC:$src1, i32u8imm:$src2),
|
||||
"vcvtps2ph\t{$src2, $src1, $dst|$dst, $src1, $src2}",
|
||||
[(set VR128:$dst, (X86cvtps2ph RC:$src1, timm:$src2))]>,
|
||||
[(set VR128:$dst, (X86any_cvtps2ph RC:$src1, timm:$src2))]>,
|
||||
TAPD, VEX, Sched<[RR]>;
|
||||
let hasSideEffects = 0, mayStore = 1 in
|
||||
def mr : Ii8<0x1D, MRMDestMem, (outs),
|
||||
@ -7369,21 +7369,21 @@ let Predicates = [HasF16C, NoVLX] in {
|
||||
WriteCvtPS2PHYSt>, VEX_L, SIMD_EXC;
|
||||
|
||||
// Pattern match vcvtph2ps of a scalar i64 load.
|
||||
def : Pat<(v4f32 (X86cvtph2ps (bc_v8i16 (v2i64 (X86vzload64 addr:$src))))),
|
||||
def : Pat<(v4f32 (X86any_cvtph2ps (bc_v8i16 (v2i64 (X86vzload64 addr:$src))))),
|
||||
(VCVTPH2PSrm addr:$src)>;
|
||||
def : Pat<(v4f32 (X86cvtph2ps (bc_v8i16
|
||||
def : Pat<(v4f32 (X86any_cvtph2ps (bc_v8i16
|
||||
(v2i64 (scalar_to_vector (loadi64 addr:$src)))))),
|
||||
(VCVTPH2PSrm addr:$src)>;
|
||||
|
||||
def : Pat<(store (f64 (extractelt
|
||||
(bc_v2f64 (v8i16 (X86cvtps2ph VR128:$src1, timm:$src2))),
|
||||
(bc_v2f64 (v8i16 (X86any_cvtps2ph VR128:$src1, timm:$src2))),
|
||||
(iPTR 0))), addr:$dst),
|
||||
(VCVTPS2PHmr addr:$dst, VR128:$src1, timm:$src2)>;
|
||||
def : Pat<(store (i64 (extractelt
|
||||
(bc_v2i64 (v8i16 (X86cvtps2ph VR128:$src1, timm:$src2))),
|
||||
(bc_v2i64 (v8i16 (X86any_cvtps2ph VR128:$src1, timm:$src2))),
|
||||
(iPTR 0))), addr:$dst),
|
||||
(VCVTPS2PHmr addr:$dst, VR128:$src1, timm:$src2)>;
|
||||
def : Pat<(store (v8i16 (X86cvtps2ph VR256:$src1, timm:$src2)), addr:$dst),
|
||||
def : Pat<(store (v8i16 (X86any_cvtps2ph VR256:$src1, timm:$src2)), addr:$dst),
|
||||
(VCVTPS2PHYmr addr:$dst, VR256:$src1, timm:$src2)>;
|
||||
}
|
||||
|
||||
|
Loading…
Reference in New Issue
Block a user