mirror of
https://github.com/RPCS3/llvm-mirror.git
synced 2025-01-31 20:51:52 +01:00
[X86][AVX512] Change VCVTSS2SD and VCVTSD2SS node types to keep consistency between VEX/EVEX versions.
AVX versions of the converts work on f32/f64 types, while AVX512 version work on vectors. Differential Revision: https://reviews.llvm.org/D29988 llvm-svn: 295940
This commit is contained in:
parent
6b427f77ee
commit
b129f93b10
@ -259,6 +259,23 @@ multiclass AVX512_maskable_common<bits<8> O, Format F, X86VectorVTInfo _,
|
||||
MaskingConstraint, NoItinerary, IsCommutable,
|
||||
IsKCommutable>;
|
||||
|
||||
// Similar to AVX512_maskable_common, but with scalar types.
|
||||
multiclass AVX512_maskable_fp_common<bits<8> O, Format F, X86VectorVTInfo _,
|
||||
dag Outs,
|
||||
dag Ins, dag MaskingIns, dag ZeroMaskingIns,
|
||||
string OpcodeStr,
|
||||
string AttSrcAsm, string IntelSrcAsm,
|
||||
SDNode Select = vselect,
|
||||
string MaskingConstraint = "",
|
||||
InstrItinClass itin = NoItinerary,
|
||||
bit IsCommutable = 0,
|
||||
bit IsKCommutable = 0> :
|
||||
AVX512_maskable_custom<O, F, Outs, Ins, MaskingIns, ZeroMaskingIns, OpcodeStr,
|
||||
AttSrcAsm, IntelSrcAsm,
|
||||
[], [], [],
|
||||
MaskingConstraint, NoItinerary, IsCommutable,
|
||||
IsKCommutable>;
|
||||
|
||||
// This multiclass generates the unconditional/non-masking, the masking and
|
||||
// the zero-masking variant of the vector instruction. In the masking case, the
|
||||
// perserved vector elements come from a new dummy input operand tied to $dst.
|
||||
@ -291,6 +308,18 @@ multiclass AVX512_maskable_scalar<bits<8> O, Format F, X86VectorVTInfo _,
|
||||
(X86selects _.KRCWM:$mask, RHS, _.RC:$src0),
|
||||
X86selects, "$src0 = $dst", itin, IsCommutable>;
|
||||
|
||||
// Similar to AVX512_maskable_scalar, but with scalar types.
|
||||
multiclass AVX512_maskable_fp_scalar<bits<8> O, Format F, X86VectorVTInfo _,
|
||||
dag Outs, dag Ins, string OpcodeStr,
|
||||
string AttSrcAsm, string IntelSrcAsm,
|
||||
InstrItinClass itin = NoItinerary,
|
||||
bit IsCommutable = 0> :
|
||||
AVX512_maskable_fp_common<O, F, _, Outs, Ins,
|
||||
!con((ins _.FRC:$src0, _.KRCWM:$mask), Ins),
|
||||
!con((ins _.KRCWM:$mask), Ins),
|
||||
OpcodeStr, AttSrcAsm, IntelSrcAsm,
|
||||
X86selects, "$src0 = $dst", itin, IsCommutable>;
|
||||
|
||||
// Similar to AVX512_maskable but in this case one of the source operands
|
||||
// ($src1) is already tied to $dst so we just use that for the preserved
|
||||
// vector elements. NOTE that the NonTiedIns (the ins dag) should exclude
|
||||
@ -6030,27 +6059,40 @@ let Predicates = [HasAVX512] in {
|
||||
//===----------------------------------------------------------------------===//
|
||||
multiclass avx512_cvt_fp_scalar<bits<8> opc, string OpcodeStr, X86VectorVTInfo _,
|
||||
X86VectorVTInfo _Src, SDNode OpNode> {
|
||||
defm rr : AVX512_maskable_scalar<opc, MRMSrcReg, _, (outs _.RC:$dst),
|
||||
let isCodeGenOnly = 1 in {
|
||||
defm rr_Int : AVX512_maskable_scalar<opc, MRMSrcReg, _, (outs _.RC:$dst),
|
||||
(ins _.RC:$src1, _Src.RC:$src2), OpcodeStr,
|
||||
"$src2, $src1", "$src1, $src2",
|
||||
(_.VT (OpNode (_.VT _.RC:$src1),
|
||||
(_Src.VT _Src.RC:$src2),
|
||||
(i32 FROUND_CURRENT)))>,
|
||||
EVEX_4V, VEX_LIG, Sched<[WriteCvtF2F]>;
|
||||
defm rm : AVX512_maskable_scalar<opc, MRMSrcMem, _, (outs _.RC:$dst),
|
||||
(ins _Src.RC:$src1, _Src.ScalarMemOp:$src2), OpcodeStr,
|
||||
defm rm_Int : AVX512_maskable_scalar<opc, MRMSrcMem, _, (outs _.RC:$dst),
|
||||
(ins _.RC:$src1, _Src.ScalarMemOp:$src2), OpcodeStr,
|
||||
"$src2, $src1", "$src1, $src2",
|
||||
(_.VT (OpNode (_.VT _.RC:$src1),
|
||||
(_Src.VT (scalar_to_vector
|
||||
(_Src.ScalarLdFrag addr:$src2))),
|
||||
(i32 FROUND_CURRENT)))>,
|
||||
EVEX_4V, VEX_LIG, Sched<[WriteCvtF2FLd, ReadAfterLd]>;
|
||||
}
|
||||
|
||||
defm rr : AVX512_maskable_fp_scalar<opc, MRMSrcReg, _, (outs _.FRC:$dst),
|
||||
(ins _.FRC:$src1, _Src.FRC:$src2), OpcodeStr,
|
||||
"$src2, $src1", "$src1, $src2">,
|
||||
EVEX_4V, VEX_LIG, Sched<[WriteCvtF2F]>;
|
||||
let mayLoad = 1 in
|
||||
defm rm : AVX512_maskable_fp_scalar<opc, MRMSrcMem, _, (outs _.FRC:$dst),
|
||||
(ins _.FRC:$src1, _Src.ScalarMemOp:$src2), OpcodeStr,
|
||||
"$src2, $src1", "$src1, $src2">,
|
||||
EVEX_4V, VEX_LIG, Sched<[WriteCvtF2FLd, ReadAfterLd]>;
|
||||
|
||||
}
|
||||
|
||||
// Scalar Coversion with SAE - suppress all exceptions
|
||||
multiclass avx512_cvt_fp_sae_scalar<bits<8> opc, string OpcodeStr, X86VectorVTInfo _,
|
||||
X86VectorVTInfo _Src, SDNode OpNodeRnd> {
|
||||
defm rrb : AVX512_maskable_scalar<opc, MRMSrcReg, _, (outs _.RC:$dst),
|
||||
defm rrb_Int : AVX512_maskable_scalar<opc, MRMSrcReg, _, (outs _.RC:$dst),
|
||||
(ins _.RC:$src1, _Src.RC:$src2), OpcodeStr,
|
||||
"{sae}, $src2, $src1", "$src1, $src2, {sae}",
|
||||
(_.VT (OpNodeRnd (_.VT _.RC:$src1),
|
||||
@ -6062,7 +6104,7 @@ multiclass avx512_cvt_fp_sae_scalar<bits<8> opc, string OpcodeStr, X86VectorVTIn
|
||||
// Scalar Conversion with rounding control (RC)
|
||||
multiclass avx512_cvt_fp_rc_scalar<bits<8> opc, string OpcodeStr, X86VectorVTInfo _,
|
||||
X86VectorVTInfo _Src, SDNode OpNodeRnd> {
|
||||
defm rrb : AVX512_maskable_scalar<opc, MRMSrcReg, _, (outs _.RC:$dst),
|
||||
defm rrb_Int : AVX512_maskable_scalar<opc, MRMSrcReg, _, (outs _.RC:$dst),
|
||||
(ins _.RC:$src1, _Src.RC:$src2, AVX512RC:$rc), OpcodeStr,
|
||||
"$rc, $src2, $src1", "$src1, $src2, $rc",
|
||||
(_.VT (OpNodeRnd (_.VT _.RC:$src1),
|
||||
@ -6095,39 +6137,36 @@ defm VCVTSS2SD : avx512_cvt_fp_scalar_ss2sd<0x5A, "vcvtss2sd",
|
||||
X86fpextRnd,f32x_info, f64x_info >;
|
||||
|
||||
def : Pat<(f64 (fpextend FR32X:$src)),
|
||||
(COPY_TO_REGCLASS (VCVTSS2SDZrr (COPY_TO_REGCLASS FR32X:$src, VR128X),
|
||||
(COPY_TO_REGCLASS FR32X:$src, VR128X)), VR128X)>,
|
||||
(VCVTSS2SDZrr (COPY_TO_REGCLASS FR32X:$src, FR64X), FR32X:$src)>,
|
||||
Requires<[HasAVX512]>;
|
||||
def : Pat<(f64 (fpextend (loadf32 addr:$src))),
|
||||
(COPY_TO_REGCLASS (VCVTSS2SDZrm (v4f32 (IMPLICIT_DEF)), addr:$src), VR128X)>,
|
||||
(VCVTSS2SDZrm (f64 (IMPLICIT_DEF)), addr:$src)>,
|
||||
Requires<[HasAVX512]>;
|
||||
|
||||
def : Pat<(f64 (extloadf32 addr:$src)),
|
||||
(COPY_TO_REGCLASS (VCVTSS2SDZrm (v4f32 (IMPLICIT_DEF)), addr:$src), VR128X)>,
|
||||
(VCVTSS2SDZrm (f64 (IMPLICIT_DEF)), addr:$src)>,
|
||||
Requires<[HasAVX512, OptForSize]>;
|
||||
|
||||
def : Pat<(f64 (extloadf32 addr:$src)),
|
||||
(COPY_TO_REGCLASS (VCVTSS2SDZrr (v4f32 (IMPLICIT_DEF)),
|
||||
(COPY_TO_REGCLASS (VMOVSSZrm addr:$src), VR128X)), VR128X)>,
|
||||
(VCVTSS2SDZrr (f64 (IMPLICIT_DEF)), (VMOVSSZrm addr:$src))>,
|
||||
Requires<[HasAVX512, OptForSpeed]>;
|
||||
|
||||
def : Pat<(f32 (fpround FR64X:$src)),
|
||||
(COPY_TO_REGCLASS (VCVTSD2SSZrr (COPY_TO_REGCLASS FR64X:$src, VR128X),
|
||||
(COPY_TO_REGCLASS FR64X:$src, VR128X)), VR128X)>,
|
||||
(VCVTSD2SSZrr (COPY_TO_REGCLASS FR64X:$src, FR32X), FR64X:$src)>,
|
||||
Requires<[HasAVX512]>;
|
||||
|
||||
def : Pat<(v4f32 (X86Movss
|
||||
(v4f32 VR128X:$dst),
|
||||
(v4f32 (scalar_to_vector
|
||||
(f32 (fpround (f64 (extractelt VR128X:$src, (iPTR 0))))))))),
|
||||
(VCVTSD2SSZrr VR128X:$dst, VR128X:$src)>,
|
||||
(VCVTSD2SSZrr_Int VR128X:$dst, VR128X:$src)>,
|
||||
Requires<[HasAVX512]>;
|
||||
|
||||
def : Pat<(v2f64 (X86Movsd
|
||||
(v2f64 VR128X:$dst),
|
||||
(v2f64 (scalar_to_vector
|
||||
(f64 (fpextend (f32 (extractelt VR128X:$src, (iPTR 0))))))))),
|
||||
(VCVTSS2SDZrr VR128X:$dst, VR128X:$src)>,
|
||||
(VCVTSS2SDZrr_Int VR128X:$dst, VR128X:$src)>,
|
||||
Requires<[HasAVX512]>;
|
||||
|
||||
//===----------------------------------------------------------------------===//
|
||||
|
@ -1851,6 +1851,10 @@ X86InstrInfo::X86InstrInfo(X86Subtarget &STI)
|
||||
{ X86::VCMPSDZrr_Int, X86::VCMPSDZrm_Int, TB_NO_REVERSE },
|
||||
{ X86::VCMPSSZrr, X86::VCMPSSZrm, 0 },
|
||||
{ X86::VCMPSSZrr_Int, X86::VCMPSSZrm_Int, TB_NO_REVERSE },
|
||||
{ X86::VCVTSS2SDZrr, X86::VCVTSS2SDZrm, 0 },
|
||||
{ X86::VCVTSS2SDZrr_Int, X86::VCVTSS2SDZrm_Int, TB_NO_REVERSE },
|
||||
{ X86::VCVTSD2SSZrr, X86::VCVTSD2SSZrm, 0 },
|
||||
{ X86::VCVTSD2SSZrr_Int, X86::VCVTSD2SSZrm_Int, TB_NO_REVERSE },
|
||||
{ X86::VDIVPDZrr, X86::VDIVPDZrm, 0 },
|
||||
{ X86::VDIVPSZrr, X86::VDIVPSZrm, 0 },
|
||||
{ X86::VDIVSDZrr, X86::VDIVSDZrm, 0 },
|
||||
@ -8165,11 +8169,15 @@ static bool hasUndefRegUpdate(unsigned Opcode) {
|
||||
case X86::VCVTUSI642SDZrrb_Int:
|
||||
case X86::VCVTUSI642SDZrm_Int:
|
||||
case X86::VCVTSD2SSZrr:
|
||||
case X86::VCVTSD2SSZrrb:
|
||||
case X86::VCVTSD2SSZrr_Int:
|
||||
case X86::VCVTSD2SSZrrb_Int:
|
||||
case X86::VCVTSD2SSZrm:
|
||||
case X86::VCVTSD2SSZrm_Int:
|
||||
case X86::VCVTSS2SDZrr:
|
||||
case X86::VCVTSS2SDZrrb:
|
||||
case X86::VCVTSS2SDZrr_Int:
|
||||
case X86::VCVTSS2SDZrrb_Int:
|
||||
case X86::VCVTSS2SDZrm:
|
||||
case X86::VCVTSS2SDZrm_Int:
|
||||
case X86::VRNDSCALESDr:
|
||||
case X86::VRNDSCALESDrb:
|
||||
case X86::VRNDSCALESDm:
|
||||
|
@ -1716,20 +1716,21 @@ def : InstAlias<"cvtsd2si{q}\t{$src, $dst|$dst, $src}",
|
||||
// Convert scalar double to scalar single
|
||||
let hasSideEffects = 0, Predicates = [UseAVX] in {
|
||||
def VCVTSD2SSrr : VSDI<0x5A, MRMSrcReg, (outs FR32:$dst),
|
||||
(ins FR64:$src1, FR64:$src2),
|
||||
(ins FR32:$src1, FR64:$src2),
|
||||
"cvtsd2ss\t{$src2, $src1, $dst|$dst, $src1, $src2}", [],
|
||||
IIC_SSE_CVT_Scalar_RR>, VEX_4V, VEX_LIG,
|
||||
Sched<[WriteCvtF2F]>, VEX_WIG;
|
||||
let mayLoad = 1 in
|
||||
def VCVTSD2SSrm : I<0x5A, MRMSrcMem, (outs FR32:$dst),
|
||||
(ins FR64:$src1, f64mem:$src2),
|
||||
(ins FR32:$src1, f64mem:$src2),
|
||||
"vcvtsd2ss\t{$src2, $src1, $dst|$dst, $src1, $src2}",
|
||||
[], IIC_SSE_CVT_Scalar_RM>,
|
||||
XD, Requires<[HasAVX, OptForSize]>, VEX_4V, VEX_LIG,
|
||||
Sched<[WriteCvtF2FLd, ReadAfterLd]>, VEX_WIG;
|
||||
}
|
||||
|
||||
def : Pat<(f32 (fpround FR64:$src)), (VCVTSD2SSrr FR64:$src, FR64:$src)>,
|
||||
def : Pat<(f32 (fpround FR64:$src)),
|
||||
(VCVTSD2SSrr (COPY_TO_REGCLASS FR64:$src, FR32), FR64:$src)>,
|
||||
Requires<[UseAVX]>;
|
||||
|
||||
def CVTSD2SSrr : SDI<0x5A, MRMSrcReg, (outs FR32:$dst), (ins FR64:$src),
|
||||
@ -1781,14 +1782,14 @@ def Int_CVTSD2SSrm: I<0x5A, MRMSrcMem,
|
||||
// SSE2 instructions with XS prefix
|
||||
let hasSideEffects = 0, Predicates = [UseAVX] in {
|
||||
def VCVTSS2SDrr : I<0x5A, MRMSrcReg, (outs FR64:$dst),
|
||||
(ins FR32:$src1, FR32:$src2),
|
||||
(ins FR64:$src1, FR32:$src2),
|
||||
"vcvtss2sd\t{$src2, $src1, $dst|$dst, $src1, $src2}",
|
||||
[], IIC_SSE_CVT_Scalar_RR>,
|
||||
XS, Requires<[HasAVX]>, VEX_4V, VEX_LIG,
|
||||
Sched<[WriteCvtF2F]>, VEX_WIG;
|
||||
let mayLoad = 1 in
|
||||
def VCVTSS2SDrm : I<0x5A, MRMSrcMem, (outs FR64:$dst),
|
||||
(ins FR32:$src1, f32mem:$src2),
|
||||
(ins FR64:$src1, f32mem:$src2),
|
||||
"vcvtss2sd\t{$src2, $src1, $dst|$dst, $src1, $src2}",
|
||||
[], IIC_SSE_CVT_Scalar_RM>,
|
||||
XS, VEX_4V, VEX_LIG, Requires<[HasAVX, OptForSize]>,
|
||||
@ -1796,15 +1797,15 @@ def VCVTSS2SDrm : I<0x5A, MRMSrcMem, (outs FR64:$dst),
|
||||
}
|
||||
|
||||
def : Pat<(f64 (fpextend FR32:$src)),
|
||||
(VCVTSS2SDrr FR32:$src, FR32:$src)>, Requires<[UseAVX]>;
|
||||
(VCVTSS2SDrr (COPY_TO_REGCLASS FR32:$src, FR64), FR32:$src)>, Requires<[UseAVX]>;
|
||||
def : Pat<(fpextend (loadf32 addr:$src)),
|
||||
(VCVTSS2SDrm (f32 (IMPLICIT_DEF)), addr:$src)>, Requires<[UseAVX]>;
|
||||
(VCVTSS2SDrm (f64 (IMPLICIT_DEF)), addr:$src)>, Requires<[UseAVX]>;
|
||||
|
||||
def : Pat<(extloadf32 addr:$src),
|
||||
(VCVTSS2SDrm (f32 (IMPLICIT_DEF)), addr:$src)>,
|
||||
(VCVTSS2SDrm (f64 (IMPLICIT_DEF)), addr:$src)>,
|
||||
Requires<[UseAVX, OptForSize]>;
|
||||
def : Pat<(extloadf32 addr:$src),
|
||||
(VCVTSS2SDrr (f32 (IMPLICIT_DEF)), (VMOVSSrm addr:$src))>,
|
||||
(VCVTSS2SDrr (f64 (IMPLICIT_DEF)), (VMOVSSrm addr:$src))>,
|
||||
Requires<[UseAVX, OptForSpeed]>;
|
||||
|
||||
def CVTSS2SDrr : I<0x5A, MRMSrcReg, (outs FR64:$dst), (ins FR32:$src),
|
||||
|
@ -1,8 +1,8 @@
|
||||
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
|
||||
; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx,+f16c | FileCheck %s --check-prefix=ALL --check-prefix=AVX --check-prefix=AVX1
|
||||
; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx2,+f16c | FileCheck %s --check-prefix=ALL --check-prefix=AVX --check-prefix=AVX2
|
||||
; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512f,-f16c | FileCheck %s --check-prefix=ALL --check-prefix=AVX --check-prefix=AVX512 --check-prefix=AVX512F
|
||||
; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512f,+avx512vl | FileCheck %s --check-prefix=ALL --check-prefix=AVX --check-prefix=AVX512 --check-prefix=AVX512VL
|
||||
; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx,+f16c -verify-machineinstrs | FileCheck %s --check-prefix=ALL --check-prefix=AVX --check-prefix=AVX1
|
||||
; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx2,+f16c -verify-machineinstrs | FileCheck %s --check-prefix=ALL --check-prefix=AVX --check-prefix=AVX2
|
||||
; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512f,-f16c -verify-machineinstrs | FileCheck %s --check-prefix=ALL --check-prefix=AVX --check-prefix=AVX512 --check-prefix=AVX512F
|
||||
; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512f,+avx512vl -verify-machineinstrs | FileCheck %s --check-prefix=ALL --check-prefix=AVX --check-prefix=AVX512 --check-prefix=AVX512VL
|
||||
|
||||
;
|
||||
; Half to Float
|
||||
@ -1941,25 +1941,25 @@ define <8 x double> @cvt_8i16_to_8f64(<8 x i16> %a0) nounwind {
|
||||
; AVX1-LABEL: cvt_8i16_to_8f64:
|
||||
; AVX1: # BB#0:
|
||||
; AVX1-NEXT: vmovq %xmm0, %rdx
|
||||
; AVX1-NEXT: movq %rdx, %r9
|
||||
; AVX1-NEXT: movq %rdx, %r8
|
||||
; AVX1-NEXT: movl %edx, %r10d
|
||||
; AVX1-NEXT: movswl %dx, %r8d
|
||||
; AVX1-NEXT: movswl %dx, %r9d
|
||||
; AVX1-NEXT: shrq $48, %rdx
|
||||
; AVX1-NEXT: shrq $32, %r9
|
||||
; AVX1-NEXT: shrq $32, %r8
|
||||
; AVX1-NEXT: shrl $16, %r10d
|
||||
; AVX1-NEXT: vpextrq $1, %xmm0, %rdi
|
||||
; AVX1-NEXT: movq %rdi, %rsi
|
||||
; AVX1-NEXT: movl %edi, %eax
|
||||
; AVX1-NEXT: movq %rdi, %rax
|
||||
; AVX1-NEXT: movl %edi, %esi
|
||||
; AVX1-NEXT: movswl %di, %ecx
|
||||
; AVX1-NEXT: shrq $48, %rdi
|
||||
; AVX1-NEXT: shrq $32, %rsi
|
||||
; AVX1-NEXT: shrl $16, %eax
|
||||
; AVX1-NEXT: cwtl
|
||||
; AVX1-NEXT: vmovd %eax, %xmm0
|
||||
; AVX1-NEXT: shrq $32, %rax
|
||||
; AVX1-NEXT: shrl $16, %esi
|
||||
; AVX1-NEXT: movswl %si, %esi
|
||||
; AVX1-NEXT: vmovd %esi, %xmm0
|
||||
; AVX1-NEXT: vcvtph2ps %xmm0, %xmm1
|
||||
; AVX1-NEXT: vmovd %ecx, %xmm0
|
||||
; AVX1-NEXT: vcvtph2ps %xmm0, %xmm2
|
||||
; AVX1-NEXT: movswl %si, %eax
|
||||
; AVX1-NEXT: cwtl
|
||||
; AVX1-NEXT: vmovd %eax, %xmm0
|
||||
; AVX1-NEXT: vcvtph2ps %xmm0, %xmm3
|
||||
; AVX1-NEXT: movswl %di, %eax
|
||||
@ -1968,9 +1968,9 @@ define <8 x double> @cvt_8i16_to_8f64(<8 x i16> %a0) nounwind {
|
||||
; AVX1-NEXT: movswl %r10w, %eax
|
||||
; AVX1-NEXT: vmovd %eax, %xmm0
|
||||
; AVX1-NEXT: vcvtph2ps %xmm0, %xmm0
|
||||
; AVX1-NEXT: vmovd %r8d, %xmm5
|
||||
; AVX1-NEXT: vmovd %r9d, %xmm5
|
||||
; AVX1-NEXT: vcvtph2ps %xmm5, %xmm5
|
||||
; AVX1-NEXT: movswl %r9w, %eax
|
||||
; AVX1-NEXT: movswl %r8w, %eax
|
||||
; AVX1-NEXT: vmovd %eax, %xmm6
|
||||
; AVX1-NEXT: vcvtph2ps %xmm6, %xmm6
|
||||
; AVX1-NEXT: movswl %dx, %eax
|
||||
@ -1995,25 +1995,25 @@ define <8 x double> @cvt_8i16_to_8f64(<8 x i16> %a0) nounwind {
|
||||
; AVX2-LABEL: cvt_8i16_to_8f64:
|
||||
; AVX2: # BB#0:
|
||||
; AVX2-NEXT: vmovq %xmm0, %rdx
|
||||
; AVX2-NEXT: movq %rdx, %r9
|
||||
; AVX2-NEXT: movq %rdx, %r8
|
||||
; AVX2-NEXT: movl %edx, %r10d
|
||||
; AVX2-NEXT: movswl %dx, %r8d
|
||||
; AVX2-NEXT: movswl %dx, %r9d
|
||||
; AVX2-NEXT: shrq $48, %rdx
|
||||
; AVX2-NEXT: shrq $32, %r9
|
||||
; AVX2-NEXT: shrq $32, %r8
|
||||
; AVX2-NEXT: shrl $16, %r10d
|
||||
; AVX2-NEXT: vpextrq $1, %xmm0, %rdi
|
||||
; AVX2-NEXT: movq %rdi, %rsi
|
||||
; AVX2-NEXT: movl %edi, %eax
|
||||
; AVX2-NEXT: movq %rdi, %rax
|
||||
; AVX2-NEXT: movl %edi, %esi
|
||||
; AVX2-NEXT: movswl %di, %ecx
|
||||
; AVX2-NEXT: shrq $48, %rdi
|
||||
; AVX2-NEXT: shrq $32, %rsi
|
||||
; AVX2-NEXT: shrl $16, %eax
|
||||
; AVX2-NEXT: cwtl
|
||||
; AVX2-NEXT: vmovd %eax, %xmm0
|
||||
; AVX2-NEXT: shrq $32, %rax
|
||||
; AVX2-NEXT: shrl $16, %esi
|
||||
; AVX2-NEXT: movswl %si, %esi
|
||||
; AVX2-NEXT: vmovd %esi, %xmm0
|
||||
; AVX2-NEXT: vcvtph2ps %xmm0, %xmm1
|
||||
; AVX2-NEXT: vmovd %ecx, %xmm0
|
||||
; AVX2-NEXT: vcvtph2ps %xmm0, %xmm2
|
||||
; AVX2-NEXT: movswl %si, %eax
|
||||
; AVX2-NEXT: cwtl
|
||||
; AVX2-NEXT: vmovd %eax, %xmm0
|
||||
; AVX2-NEXT: vcvtph2ps %xmm0, %xmm3
|
||||
; AVX2-NEXT: movswl %di, %eax
|
||||
@ -2022,9 +2022,9 @@ define <8 x double> @cvt_8i16_to_8f64(<8 x i16> %a0) nounwind {
|
||||
; AVX2-NEXT: movswl %r10w, %eax
|
||||
; AVX2-NEXT: vmovd %eax, %xmm0
|
||||
; AVX2-NEXT: vcvtph2ps %xmm0, %xmm0
|
||||
; AVX2-NEXT: vmovd %r8d, %xmm5
|
||||
; AVX2-NEXT: vmovd %r9d, %xmm5
|
||||
; AVX2-NEXT: vcvtph2ps %xmm5, %xmm5
|
||||
; AVX2-NEXT: movswl %r9w, %eax
|
||||
; AVX2-NEXT: movswl %r8w, %eax
|
||||
; AVX2-NEXT: vmovd %eax, %xmm6
|
||||
; AVX2-NEXT: vcvtph2ps %xmm6, %xmm6
|
||||
; AVX2-NEXT: movswl %dx, %eax
|
||||
|
Loading…
x
Reference in New Issue
Block a user