1
0
mirror of https://github.com/RPCS3/llvm-mirror.git synced 2025-01-31 20:51:52 +01:00

[X86][AVX512] Change VCVTSS2SD and VCVTSD2SS node types to keep consistency between VEX/EVEX versions.

AVX versions of the converts work on f32/f64 types, while AVX512 version work on vectors.

Differential Revision: https://reviews.llvm.org/D29988

llvm-svn: 295940
This commit is contained in:
Ayman Musa 2017-02-23 07:24:21 +00:00
parent 6b427f77ee
commit b129f93b10
4 changed files with 102 additions and 54 deletions

View File

@ -259,6 +259,23 @@ multiclass AVX512_maskable_common<bits<8> O, Format F, X86VectorVTInfo _,
MaskingConstraint, NoItinerary, IsCommutable,
IsKCommutable>;
// Similar to AVX512_maskable_common, but with scalar types.
multiclass AVX512_maskable_fp_common<bits<8> O, Format F, X86VectorVTInfo _,
dag Outs,
dag Ins, dag MaskingIns, dag ZeroMaskingIns,
string OpcodeStr,
string AttSrcAsm, string IntelSrcAsm,
SDNode Select = vselect,
string MaskingConstraint = "",
InstrItinClass itin = NoItinerary,
bit IsCommutable = 0,
bit IsKCommutable = 0> :
AVX512_maskable_custom<O, F, Outs, Ins, MaskingIns, ZeroMaskingIns, OpcodeStr,
AttSrcAsm, IntelSrcAsm,
[], [], [],
MaskingConstraint, NoItinerary, IsCommutable,
IsKCommutable>;
// This multiclass generates the unconditional/non-masking, the masking and
// the zero-masking variant of the vector instruction. In the masking case, the
// perserved vector elements come from a new dummy input operand tied to $dst.
@ -291,6 +308,18 @@ multiclass AVX512_maskable_scalar<bits<8> O, Format F, X86VectorVTInfo _,
(X86selects _.KRCWM:$mask, RHS, _.RC:$src0),
X86selects, "$src0 = $dst", itin, IsCommutable>;
// Similar to AVX512_maskable_scalar, but with scalar types.
multiclass AVX512_maskable_fp_scalar<bits<8> O, Format F, X86VectorVTInfo _,
dag Outs, dag Ins, string OpcodeStr,
string AttSrcAsm, string IntelSrcAsm,
InstrItinClass itin = NoItinerary,
bit IsCommutable = 0> :
AVX512_maskable_fp_common<O, F, _, Outs, Ins,
!con((ins _.FRC:$src0, _.KRCWM:$mask), Ins),
!con((ins _.KRCWM:$mask), Ins),
OpcodeStr, AttSrcAsm, IntelSrcAsm,
X86selects, "$src0 = $dst", itin, IsCommutable>;
// Similar to AVX512_maskable but in this case one of the source operands
// ($src1) is already tied to $dst so we just use that for the preserved
// vector elements. NOTE that the NonTiedIns (the ins dag) should exclude
@ -6030,27 +6059,40 @@ let Predicates = [HasAVX512] in {
//===----------------------------------------------------------------------===//
multiclass avx512_cvt_fp_scalar<bits<8> opc, string OpcodeStr, X86VectorVTInfo _,
X86VectorVTInfo _Src, SDNode OpNode> {
defm rr : AVX512_maskable_scalar<opc, MRMSrcReg, _, (outs _.RC:$dst),
let isCodeGenOnly = 1 in {
defm rr_Int : AVX512_maskable_scalar<opc, MRMSrcReg, _, (outs _.RC:$dst),
(ins _.RC:$src1, _Src.RC:$src2), OpcodeStr,
"$src2, $src1", "$src1, $src2",
(_.VT (OpNode (_.VT _.RC:$src1),
(_Src.VT _Src.RC:$src2),
(i32 FROUND_CURRENT)))>,
EVEX_4V, VEX_LIG, Sched<[WriteCvtF2F]>;
defm rm : AVX512_maskable_scalar<opc, MRMSrcMem, _, (outs _.RC:$dst),
(ins _Src.RC:$src1, _Src.ScalarMemOp:$src2), OpcodeStr,
defm rm_Int : AVX512_maskable_scalar<opc, MRMSrcMem, _, (outs _.RC:$dst),
(ins _.RC:$src1, _Src.ScalarMemOp:$src2), OpcodeStr,
"$src2, $src1", "$src1, $src2",
(_.VT (OpNode (_.VT _.RC:$src1),
(_Src.VT (scalar_to_vector
(_Src.ScalarLdFrag addr:$src2))),
(i32 FROUND_CURRENT)))>,
EVEX_4V, VEX_LIG, Sched<[WriteCvtF2FLd, ReadAfterLd]>;
}
defm rr : AVX512_maskable_fp_scalar<opc, MRMSrcReg, _, (outs _.FRC:$dst),
(ins _.FRC:$src1, _Src.FRC:$src2), OpcodeStr,
"$src2, $src1", "$src1, $src2">,
EVEX_4V, VEX_LIG, Sched<[WriteCvtF2F]>;
let mayLoad = 1 in
defm rm : AVX512_maskable_fp_scalar<opc, MRMSrcMem, _, (outs _.FRC:$dst),
(ins _.FRC:$src1, _Src.ScalarMemOp:$src2), OpcodeStr,
"$src2, $src1", "$src1, $src2">,
EVEX_4V, VEX_LIG, Sched<[WriteCvtF2FLd, ReadAfterLd]>;
}
// Scalar Coversion with SAE - suppress all exceptions
multiclass avx512_cvt_fp_sae_scalar<bits<8> opc, string OpcodeStr, X86VectorVTInfo _,
X86VectorVTInfo _Src, SDNode OpNodeRnd> {
defm rrb : AVX512_maskable_scalar<opc, MRMSrcReg, _, (outs _.RC:$dst),
defm rrb_Int : AVX512_maskable_scalar<opc, MRMSrcReg, _, (outs _.RC:$dst),
(ins _.RC:$src1, _Src.RC:$src2), OpcodeStr,
"{sae}, $src2, $src1", "$src1, $src2, {sae}",
(_.VT (OpNodeRnd (_.VT _.RC:$src1),
@ -6062,7 +6104,7 @@ multiclass avx512_cvt_fp_sae_scalar<bits<8> opc, string OpcodeStr, X86VectorVTIn
// Scalar Conversion with rounding control (RC)
multiclass avx512_cvt_fp_rc_scalar<bits<8> opc, string OpcodeStr, X86VectorVTInfo _,
X86VectorVTInfo _Src, SDNode OpNodeRnd> {
defm rrb : AVX512_maskable_scalar<opc, MRMSrcReg, _, (outs _.RC:$dst),
defm rrb_Int : AVX512_maskable_scalar<opc, MRMSrcReg, _, (outs _.RC:$dst),
(ins _.RC:$src1, _Src.RC:$src2, AVX512RC:$rc), OpcodeStr,
"$rc, $src2, $src1", "$src1, $src2, $rc",
(_.VT (OpNodeRnd (_.VT _.RC:$src1),
@ -6095,39 +6137,36 @@ defm VCVTSS2SD : avx512_cvt_fp_scalar_ss2sd<0x5A, "vcvtss2sd",
X86fpextRnd,f32x_info, f64x_info >;
def : Pat<(f64 (fpextend FR32X:$src)),
(COPY_TO_REGCLASS (VCVTSS2SDZrr (COPY_TO_REGCLASS FR32X:$src, VR128X),
(COPY_TO_REGCLASS FR32X:$src, VR128X)), VR128X)>,
(VCVTSS2SDZrr (COPY_TO_REGCLASS FR32X:$src, FR64X), FR32X:$src)>,
Requires<[HasAVX512]>;
def : Pat<(f64 (fpextend (loadf32 addr:$src))),
(COPY_TO_REGCLASS (VCVTSS2SDZrm (v4f32 (IMPLICIT_DEF)), addr:$src), VR128X)>,
(VCVTSS2SDZrm (f64 (IMPLICIT_DEF)), addr:$src)>,
Requires<[HasAVX512]>;
def : Pat<(f64 (extloadf32 addr:$src)),
(COPY_TO_REGCLASS (VCVTSS2SDZrm (v4f32 (IMPLICIT_DEF)), addr:$src), VR128X)>,
(VCVTSS2SDZrm (f64 (IMPLICIT_DEF)), addr:$src)>,
Requires<[HasAVX512, OptForSize]>;
def : Pat<(f64 (extloadf32 addr:$src)),
(COPY_TO_REGCLASS (VCVTSS2SDZrr (v4f32 (IMPLICIT_DEF)),
(COPY_TO_REGCLASS (VMOVSSZrm addr:$src), VR128X)), VR128X)>,
(VCVTSS2SDZrr (f64 (IMPLICIT_DEF)), (VMOVSSZrm addr:$src))>,
Requires<[HasAVX512, OptForSpeed]>;
def : Pat<(f32 (fpround FR64X:$src)),
(COPY_TO_REGCLASS (VCVTSD2SSZrr (COPY_TO_REGCLASS FR64X:$src, VR128X),
(COPY_TO_REGCLASS FR64X:$src, VR128X)), VR128X)>,
(VCVTSD2SSZrr (COPY_TO_REGCLASS FR64X:$src, FR32X), FR64X:$src)>,
Requires<[HasAVX512]>;
def : Pat<(v4f32 (X86Movss
(v4f32 VR128X:$dst),
(v4f32 (scalar_to_vector
(f32 (fpround (f64 (extractelt VR128X:$src, (iPTR 0))))))))),
(VCVTSD2SSZrr VR128X:$dst, VR128X:$src)>,
(VCVTSD2SSZrr_Int VR128X:$dst, VR128X:$src)>,
Requires<[HasAVX512]>;
def : Pat<(v2f64 (X86Movsd
(v2f64 VR128X:$dst),
(v2f64 (scalar_to_vector
(f64 (fpextend (f32 (extractelt VR128X:$src, (iPTR 0))))))))),
(VCVTSS2SDZrr VR128X:$dst, VR128X:$src)>,
(VCVTSS2SDZrr_Int VR128X:$dst, VR128X:$src)>,
Requires<[HasAVX512]>;
//===----------------------------------------------------------------------===//

View File

@ -1851,6 +1851,10 @@ X86InstrInfo::X86InstrInfo(X86Subtarget &STI)
{ X86::VCMPSDZrr_Int, X86::VCMPSDZrm_Int, TB_NO_REVERSE },
{ X86::VCMPSSZrr, X86::VCMPSSZrm, 0 },
{ X86::VCMPSSZrr_Int, X86::VCMPSSZrm_Int, TB_NO_REVERSE },
{ X86::VCVTSS2SDZrr, X86::VCVTSS2SDZrm, 0 },
{ X86::VCVTSS2SDZrr_Int, X86::VCVTSS2SDZrm_Int, TB_NO_REVERSE },
{ X86::VCVTSD2SSZrr, X86::VCVTSD2SSZrm, 0 },
{ X86::VCVTSD2SSZrr_Int, X86::VCVTSD2SSZrm_Int, TB_NO_REVERSE },
{ X86::VDIVPDZrr, X86::VDIVPDZrm, 0 },
{ X86::VDIVPSZrr, X86::VDIVPSZrm, 0 },
{ X86::VDIVSDZrr, X86::VDIVSDZrm, 0 },
@ -8165,11 +8169,15 @@ static bool hasUndefRegUpdate(unsigned Opcode) {
case X86::VCVTUSI642SDZrrb_Int:
case X86::VCVTUSI642SDZrm_Int:
case X86::VCVTSD2SSZrr:
case X86::VCVTSD2SSZrrb:
case X86::VCVTSD2SSZrr_Int:
case X86::VCVTSD2SSZrrb_Int:
case X86::VCVTSD2SSZrm:
case X86::VCVTSD2SSZrm_Int:
case X86::VCVTSS2SDZrr:
case X86::VCVTSS2SDZrrb:
case X86::VCVTSS2SDZrr_Int:
case X86::VCVTSS2SDZrrb_Int:
case X86::VCVTSS2SDZrm:
case X86::VCVTSS2SDZrm_Int:
case X86::VRNDSCALESDr:
case X86::VRNDSCALESDrb:
case X86::VRNDSCALESDm:

View File

@ -1716,20 +1716,21 @@ def : InstAlias<"cvtsd2si{q}\t{$src, $dst|$dst, $src}",
// Convert scalar double to scalar single
let hasSideEffects = 0, Predicates = [UseAVX] in {
def VCVTSD2SSrr : VSDI<0x5A, MRMSrcReg, (outs FR32:$dst),
(ins FR64:$src1, FR64:$src2),
(ins FR32:$src1, FR64:$src2),
"cvtsd2ss\t{$src2, $src1, $dst|$dst, $src1, $src2}", [],
IIC_SSE_CVT_Scalar_RR>, VEX_4V, VEX_LIG,
Sched<[WriteCvtF2F]>, VEX_WIG;
let mayLoad = 1 in
def VCVTSD2SSrm : I<0x5A, MRMSrcMem, (outs FR32:$dst),
(ins FR64:$src1, f64mem:$src2),
(ins FR32:$src1, f64mem:$src2),
"vcvtsd2ss\t{$src2, $src1, $dst|$dst, $src1, $src2}",
[], IIC_SSE_CVT_Scalar_RM>,
XD, Requires<[HasAVX, OptForSize]>, VEX_4V, VEX_LIG,
Sched<[WriteCvtF2FLd, ReadAfterLd]>, VEX_WIG;
}
def : Pat<(f32 (fpround FR64:$src)), (VCVTSD2SSrr FR64:$src, FR64:$src)>,
def : Pat<(f32 (fpround FR64:$src)),
(VCVTSD2SSrr (COPY_TO_REGCLASS FR64:$src, FR32), FR64:$src)>,
Requires<[UseAVX]>;
def CVTSD2SSrr : SDI<0x5A, MRMSrcReg, (outs FR32:$dst), (ins FR64:$src),
@ -1781,14 +1782,14 @@ def Int_CVTSD2SSrm: I<0x5A, MRMSrcMem,
// SSE2 instructions with XS prefix
let hasSideEffects = 0, Predicates = [UseAVX] in {
def VCVTSS2SDrr : I<0x5A, MRMSrcReg, (outs FR64:$dst),
(ins FR32:$src1, FR32:$src2),
(ins FR64:$src1, FR32:$src2),
"vcvtss2sd\t{$src2, $src1, $dst|$dst, $src1, $src2}",
[], IIC_SSE_CVT_Scalar_RR>,
XS, Requires<[HasAVX]>, VEX_4V, VEX_LIG,
Sched<[WriteCvtF2F]>, VEX_WIG;
let mayLoad = 1 in
def VCVTSS2SDrm : I<0x5A, MRMSrcMem, (outs FR64:$dst),
(ins FR32:$src1, f32mem:$src2),
(ins FR64:$src1, f32mem:$src2),
"vcvtss2sd\t{$src2, $src1, $dst|$dst, $src1, $src2}",
[], IIC_SSE_CVT_Scalar_RM>,
XS, VEX_4V, VEX_LIG, Requires<[HasAVX, OptForSize]>,
@ -1796,15 +1797,15 @@ def VCVTSS2SDrm : I<0x5A, MRMSrcMem, (outs FR64:$dst),
}
def : Pat<(f64 (fpextend FR32:$src)),
(VCVTSS2SDrr FR32:$src, FR32:$src)>, Requires<[UseAVX]>;
(VCVTSS2SDrr (COPY_TO_REGCLASS FR32:$src, FR64), FR32:$src)>, Requires<[UseAVX]>;
def : Pat<(fpextend (loadf32 addr:$src)),
(VCVTSS2SDrm (f32 (IMPLICIT_DEF)), addr:$src)>, Requires<[UseAVX]>;
(VCVTSS2SDrm (f64 (IMPLICIT_DEF)), addr:$src)>, Requires<[UseAVX]>;
def : Pat<(extloadf32 addr:$src),
(VCVTSS2SDrm (f32 (IMPLICIT_DEF)), addr:$src)>,
(VCVTSS2SDrm (f64 (IMPLICIT_DEF)), addr:$src)>,
Requires<[UseAVX, OptForSize]>;
def : Pat<(extloadf32 addr:$src),
(VCVTSS2SDrr (f32 (IMPLICIT_DEF)), (VMOVSSrm addr:$src))>,
(VCVTSS2SDrr (f64 (IMPLICIT_DEF)), (VMOVSSrm addr:$src))>,
Requires<[UseAVX, OptForSpeed]>;
def CVTSS2SDrr : I<0x5A, MRMSrcReg, (outs FR64:$dst), (ins FR32:$src),

View File

@ -1,8 +1,8 @@
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx,+f16c | FileCheck %s --check-prefix=ALL --check-prefix=AVX --check-prefix=AVX1
; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx2,+f16c | FileCheck %s --check-prefix=ALL --check-prefix=AVX --check-prefix=AVX2
; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512f,-f16c | FileCheck %s --check-prefix=ALL --check-prefix=AVX --check-prefix=AVX512 --check-prefix=AVX512F
; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512f,+avx512vl | FileCheck %s --check-prefix=ALL --check-prefix=AVX --check-prefix=AVX512 --check-prefix=AVX512VL
; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx,+f16c -verify-machineinstrs | FileCheck %s --check-prefix=ALL --check-prefix=AVX --check-prefix=AVX1
; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx2,+f16c -verify-machineinstrs | FileCheck %s --check-prefix=ALL --check-prefix=AVX --check-prefix=AVX2
; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512f,-f16c -verify-machineinstrs | FileCheck %s --check-prefix=ALL --check-prefix=AVX --check-prefix=AVX512 --check-prefix=AVX512F
; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512f,+avx512vl -verify-machineinstrs | FileCheck %s --check-prefix=ALL --check-prefix=AVX --check-prefix=AVX512 --check-prefix=AVX512VL
;
; Half to Float
@ -1941,25 +1941,25 @@ define <8 x double> @cvt_8i16_to_8f64(<8 x i16> %a0) nounwind {
; AVX1-LABEL: cvt_8i16_to_8f64:
; AVX1: # BB#0:
; AVX1-NEXT: vmovq %xmm0, %rdx
; AVX1-NEXT: movq %rdx, %r9
; AVX1-NEXT: movq %rdx, %r8
; AVX1-NEXT: movl %edx, %r10d
; AVX1-NEXT: movswl %dx, %r8d
; AVX1-NEXT: movswl %dx, %r9d
; AVX1-NEXT: shrq $48, %rdx
; AVX1-NEXT: shrq $32, %r9
; AVX1-NEXT: shrq $32, %r8
; AVX1-NEXT: shrl $16, %r10d
; AVX1-NEXT: vpextrq $1, %xmm0, %rdi
; AVX1-NEXT: movq %rdi, %rsi
; AVX1-NEXT: movl %edi, %eax
; AVX1-NEXT: movq %rdi, %rax
; AVX1-NEXT: movl %edi, %esi
; AVX1-NEXT: movswl %di, %ecx
; AVX1-NEXT: shrq $48, %rdi
; AVX1-NEXT: shrq $32, %rsi
; AVX1-NEXT: shrl $16, %eax
; AVX1-NEXT: cwtl
; AVX1-NEXT: vmovd %eax, %xmm0
; AVX1-NEXT: shrq $32, %rax
; AVX1-NEXT: shrl $16, %esi
; AVX1-NEXT: movswl %si, %esi
; AVX1-NEXT: vmovd %esi, %xmm0
; AVX1-NEXT: vcvtph2ps %xmm0, %xmm1
; AVX1-NEXT: vmovd %ecx, %xmm0
; AVX1-NEXT: vcvtph2ps %xmm0, %xmm2
; AVX1-NEXT: movswl %si, %eax
; AVX1-NEXT: cwtl
; AVX1-NEXT: vmovd %eax, %xmm0
; AVX1-NEXT: vcvtph2ps %xmm0, %xmm3
; AVX1-NEXT: movswl %di, %eax
@ -1968,9 +1968,9 @@ define <8 x double> @cvt_8i16_to_8f64(<8 x i16> %a0) nounwind {
; AVX1-NEXT: movswl %r10w, %eax
; AVX1-NEXT: vmovd %eax, %xmm0
; AVX1-NEXT: vcvtph2ps %xmm0, %xmm0
; AVX1-NEXT: vmovd %r8d, %xmm5
; AVX1-NEXT: vmovd %r9d, %xmm5
; AVX1-NEXT: vcvtph2ps %xmm5, %xmm5
; AVX1-NEXT: movswl %r9w, %eax
; AVX1-NEXT: movswl %r8w, %eax
; AVX1-NEXT: vmovd %eax, %xmm6
; AVX1-NEXT: vcvtph2ps %xmm6, %xmm6
; AVX1-NEXT: movswl %dx, %eax
@ -1995,25 +1995,25 @@ define <8 x double> @cvt_8i16_to_8f64(<8 x i16> %a0) nounwind {
; AVX2-LABEL: cvt_8i16_to_8f64:
; AVX2: # BB#0:
; AVX2-NEXT: vmovq %xmm0, %rdx
; AVX2-NEXT: movq %rdx, %r9
; AVX2-NEXT: movq %rdx, %r8
; AVX2-NEXT: movl %edx, %r10d
; AVX2-NEXT: movswl %dx, %r8d
; AVX2-NEXT: movswl %dx, %r9d
; AVX2-NEXT: shrq $48, %rdx
; AVX2-NEXT: shrq $32, %r9
; AVX2-NEXT: shrq $32, %r8
; AVX2-NEXT: shrl $16, %r10d
; AVX2-NEXT: vpextrq $1, %xmm0, %rdi
; AVX2-NEXT: movq %rdi, %rsi
; AVX2-NEXT: movl %edi, %eax
; AVX2-NEXT: movq %rdi, %rax
; AVX2-NEXT: movl %edi, %esi
; AVX2-NEXT: movswl %di, %ecx
; AVX2-NEXT: shrq $48, %rdi
; AVX2-NEXT: shrq $32, %rsi
; AVX2-NEXT: shrl $16, %eax
; AVX2-NEXT: cwtl
; AVX2-NEXT: vmovd %eax, %xmm0
; AVX2-NEXT: shrq $32, %rax
; AVX2-NEXT: shrl $16, %esi
; AVX2-NEXT: movswl %si, %esi
; AVX2-NEXT: vmovd %esi, %xmm0
; AVX2-NEXT: vcvtph2ps %xmm0, %xmm1
; AVX2-NEXT: vmovd %ecx, %xmm0
; AVX2-NEXT: vcvtph2ps %xmm0, %xmm2
; AVX2-NEXT: movswl %si, %eax
; AVX2-NEXT: cwtl
; AVX2-NEXT: vmovd %eax, %xmm0
; AVX2-NEXT: vcvtph2ps %xmm0, %xmm3
; AVX2-NEXT: movswl %di, %eax
@ -2022,9 +2022,9 @@ define <8 x double> @cvt_8i16_to_8f64(<8 x i16> %a0) nounwind {
; AVX2-NEXT: movswl %r10w, %eax
; AVX2-NEXT: vmovd %eax, %xmm0
; AVX2-NEXT: vcvtph2ps %xmm0, %xmm0
; AVX2-NEXT: vmovd %r8d, %xmm5
; AVX2-NEXT: vmovd %r9d, %xmm5
; AVX2-NEXT: vcvtph2ps %xmm5, %xmm5
; AVX2-NEXT: movswl %r9w, %eax
; AVX2-NEXT: movswl %r8w, %eax
; AVX2-NEXT: vmovd %eax, %xmm6
; AVX2-NEXT: vcvtph2ps %xmm6, %xmm6
; AVX2-NEXT: movswl %dx, %eax