1
0
mirror of https://github.com/RPCS3/llvm-mirror.git synced 2024-10-19 11:02:59 +02:00

[AVX-512] Use 512-bit vcvtps2ph/vcvtph2ps to implement fp_to_f16/f16_to_fp when F16C and VLX are not supported.

Fixes PR23941.

llvm-svn: 281958
This commit is contained in:
Craig Topper 2016-09-20 05:44:47 +00:00
parent 140d944d08
commit 6651d4f318
4 changed files with 823 additions and 424 deletions

View File

@ -350,7 +350,7 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM,
// If we don't have F16C support, then lower half float conversions
// into library calls.
if (Subtarget.useSoftFloat() ||
(!Subtarget.hasF16C() && !Subtarget.hasVLX())) {
(!Subtarget.hasF16C() && !Subtarget.hasAVX512())) {
setOperationAction(ISD::FP16_TO_FP, MVT::f32, Expand);
setOperationAction(ISD::FP_TO_FP16, MVT::f32, Expand);
}

View File

@ -6303,7 +6303,7 @@ let Predicates = [HasAVX512] in {
}
}
// Patterns for matching conversions from float to half-float and vice versa.
// Patterns for matching conversions from float to half-float and vice versa.
let Predicates = [HasVLX] in {
// Use MXCSR.RC for rounding instead of explicitly specifying the default
// rounding mode (Nearest-Even, encoded as 0). Both are equivalent in the
@ -6323,6 +6323,35 @@ let Predicates = [HasVLX] in {
(VCVTPS2PHZ128rr (COPY_TO_REGCLASS FR32X:$src, VR128X), 4)), FR32X)) >;
}
// Patterns for matching float to half-float conversion when AVX512 is supported
// but F16C isn't. In that case we have to use 512-bit vectors.
let Predicates = [HasAVX512, NoVLX, NoF16C] in {
def : Pat<(fp_to_f16 FR32X:$src),
(i16 (EXTRACT_SUBREG
(VMOVPDI2DIZrr
(v8i16 (EXTRACT_SUBREG
(VCVTPS2PHZrr
(INSERT_SUBREG (v16f32 (IMPLICIT_DEF)),
(v4f32 (COPY_TO_REGCLASS FR32X:$src, VR128X)),
sub_xmm), 4), sub_xmm))), sub_16bit))>;
def : Pat<(f16_to_fp GR16:$src),
(f32 (COPY_TO_REGCLASS
(v4f32 (EXTRACT_SUBREG
(VCVTPH2PSZrr
(INSERT_SUBREG (v16i16 (IMPLICIT_DEF)),
(v8i16 (COPY_TO_REGCLASS (MOVSX32rr16 GR16:$src), VR128X)),
sub_xmm)), sub_xmm)), FR32X))>;
def : Pat<(f16_to_fp (i16 (fp_to_f16 FR32X:$src))),
(f32 (COPY_TO_REGCLASS
(v4f32 (EXTRACT_SUBREG
(VCVTPH2PSZrr
(VCVTPS2PHZrr (INSERT_SUBREG (v16f32 (IMPLICIT_DEF)),
(v4f32 (COPY_TO_REGCLASS FR32X:$src, VR128X)),
sub_xmm), 4)), sub_xmm)), FR32X))>;
}
// Unordered/Ordered scalar fp compare with Sea and set EFLAGS
multiclass avx512_ord_cmp_sae<bits<8> opc, X86VectorVTInfo _, SDNode OpNode,
string OpcodeStr> {

View File

@ -832,6 +832,7 @@ def HasTBM : Predicate<"Subtarget->hasTBM()">;
def HasMOVBE : Predicate<"Subtarget->hasMOVBE()">;
def HasRDRAND : Predicate<"Subtarget->hasRDRAND()">;
def HasF16C : Predicate<"Subtarget->hasF16C()">;
def NoF16C : Predicate<"!Subtarget->hasF16C()">;
def HasFSGSBase : Predicate<"Subtarget->hasFSGSBase()">;
def HasLZCNT : Predicate<"Subtarget->hasLZCNT()">;
def HasBMI : Predicate<"Subtarget->hasBMI()">;

File diff suppressed because it is too large Load Diff