mirror of
https://github.com/RPCS3/llvm-mirror.git
synced 2024-10-22 04:22:57 +02:00
[X86] Improve v2i64->v2f32 and v4i64->v4f32 uint_to_fp on avx and avx2 targets.
Summary: Based on Simon's D52965, but improved to handle strict fp and improve some of the shuffling. Rather than use v2i1/v4i1 and let type legalization continue, just generate all the code with legal types and use an explicit shuffle. I also added an explicit setcc to the v4i64 code to match the semantics of vselect which doesn't just use the sign bit. I'm also using a v4i64->v4i32 truncate instead of the shuffle in Simon's original code. With the setcc this will become a pack. Future work can look into using X86ISD::BLENDV and a different shuffle that only moves the sign bit. Reviewers: RKSimon, spatel Reviewed By: RKSimon Subscribers: hiraditya, llvm-commits Tags: #llvm Differential Revision: https://reviews.llvm.org/D71956
This commit is contained in:
parent
6ec5dfb8d7
commit
19d6875129
@ -1176,6 +1176,15 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM,
|
|||||||
setOperationAction(ISD::SINT_TO_FP, MVT::v8i32, Legal);
|
setOperationAction(ISD::SINT_TO_FP, MVT::v8i32, Legal);
|
||||||
setOperationAction(ISD::STRICT_SINT_TO_FP, MVT::v8i32, Legal);
|
setOperationAction(ISD::STRICT_SINT_TO_FP, MVT::v8i32, Legal);
|
||||||
|
|
||||||
|
if (Subtarget.is64Bit() && !Subtarget.hasAVX512()) {
|
||||||
|
// We need to mark SINT_TO_FP as Custom even though we want to expand it
|
||||||
|
// so that DAG combine doesn't try to turn it into uint_to_fp.
|
||||||
|
setOperationAction(ISD::SINT_TO_FP, MVT::v4i64, Custom);
|
||||||
|
setOperationAction(ISD::STRICT_SINT_TO_FP, MVT::v4i64, Custom);
|
||||||
|
setOperationAction(ISD::UINT_TO_FP, MVT::v4i64, Custom);
|
||||||
|
setOperationAction(ISD::STRICT_UINT_TO_FP, MVT::v4i64, Custom);
|
||||||
|
}
|
||||||
|
|
||||||
setOperationAction(ISD::STRICT_FP_ROUND, MVT::v4f32, Legal);
|
setOperationAction(ISD::STRICT_FP_ROUND, MVT::v4f32, Legal);
|
||||||
setOperationAction(ISD::STRICT_FADD, MVT::v8f32, Legal);
|
setOperationAction(ISD::STRICT_FADD, MVT::v8f32, Legal);
|
||||||
setOperationAction(ISD::STRICT_FADD, MVT::v4f64, Legal);
|
setOperationAction(ISD::STRICT_FADD, MVT::v4f64, Legal);
|
||||||
@ -18620,42 +18629,91 @@ static SDValue vectorizeExtractedCast(SDValue Cast, SelectionDAG &DAG,
|
|||||||
|
|
||||||
static SDValue lowerINT_TO_FP_vXi64(SDValue Op, SelectionDAG &DAG,
|
static SDValue lowerINT_TO_FP_vXi64(SDValue Op, SelectionDAG &DAG,
|
||||||
const X86Subtarget &Subtarget) {
|
const X86Subtarget &Subtarget) {
|
||||||
assert(Subtarget.hasDQI() && !Subtarget.hasVLX() && "Unexpected features");
|
|
||||||
|
|
||||||
SDLoc DL(Op);
|
SDLoc DL(Op);
|
||||||
bool IsStrict = Op->isStrictFPOpcode();
|
bool IsStrict = Op->isStrictFPOpcode();
|
||||||
MVT VT = Op->getSimpleValueType(0);
|
MVT VT = Op->getSimpleValueType(0);
|
||||||
SDValue Src = Op->getOperand(IsStrict ? 1 : 0);
|
SDValue Src = Op->getOperand(IsStrict ? 1 : 0);
|
||||||
assert((Src.getSimpleValueType() == MVT::v2i64 ||
|
|
||||||
Src.getSimpleValueType() == MVT::v4i64) &&
|
|
||||||
"Unsupported custom type");
|
|
||||||
|
|
||||||
// With AVX512DQ, but not VLX we need to widen to get a 512-bit result type.
|
if (Subtarget.hasDQI()) {
|
||||||
assert((VT == MVT::v4f32 || VT == MVT::v2f64 || VT == MVT::v4f64) &&
|
assert(!Subtarget.hasVLX() && "Unexpected features");
|
||||||
"Unexpected VT!");
|
|
||||||
MVT WideVT = VT == MVT::v4f32 ? MVT::v8f32 : MVT::v8f64;
|
|
||||||
|
|
||||||
// Need to concat with zero vector for strict fp to avoid spurious
|
assert((Src.getSimpleValueType() == MVT::v2i64 ||
|
||||||
// exceptions.
|
Src.getSimpleValueType() == MVT::v4i64) &&
|
||||||
SDValue Tmp =
|
"Unsupported custom type");
|
||||||
IsStrict ? DAG.getConstant(0, DL, MVT::v8i64) : DAG.getUNDEF(MVT::v8i64);
|
|
||||||
Src = DAG.getNode(ISD::INSERT_SUBVECTOR, DL, MVT::v8i64, Tmp, Src,
|
// With AVX512DQ, but not VLX we need to widen to get a 512-bit result type.
|
||||||
DAG.getIntPtrConstant(0, DL));
|
assert((VT == MVT::v4f32 || VT == MVT::v2f64 || VT == MVT::v4f64) &&
|
||||||
SDValue Res, Chain;
|
"Unexpected VT!");
|
||||||
if (IsStrict) {
|
MVT WideVT = VT == MVT::v4f32 ? MVT::v8f32 : MVT::v8f64;
|
||||||
Res = DAG.getNode(Op.getOpcode(), DL, {WideVT, MVT::Other},
|
|
||||||
{Op->getOperand(0), Src});
|
// Need to concat with zero vector for strict fp to avoid spurious
|
||||||
Chain = Res.getValue(1);
|
// exceptions.
|
||||||
} else {
|
SDValue Tmp = IsStrict ? DAG.getConstant(0, DL, MVT::v8i64)
|
||||||
Res = DAG.getNode(Op.getOpcode(), DL, WideVT, Src);
|
: DAG.getUNDEF(MVT::v8i64);
|
||||||
|
Src = DAG.getNode(ISD::INSERT_SUBVECTOR, DL, MVT::v8i64, Tmp, Src,
|
||||||
|
DAG.getIntPtrConstant(0, DL));
|
||||||
|
SDValue Res, Chain;
|
||||||
|
if (IsStrict) {
|
||||||
|
Res = DAG.getNode(Op.getOpcode(), DL, {WideVT, MVT::Other},
|
||||||
|
{Op->getOperand(0), Src});
|
||||||
|
Chain = Res.getValue(1);
|
||||||
|
} else {
|
||||||
|
Res = DAG.getNode(Op.getOpcode(), DL, WideVT, Src);
|
||||||
|
}
|
||||||
|
|
||||||
|
Res = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, VT, Res,
|
||||||
|
DAG.getIntPtrConstant(0, DL));
|
||||||
|
|
||||||
|
if (IsStrict)
|
||||||
|
return DAG.getMergeValues({Res, Chain}, DL);
|
||||||
|
return Res;
|
||||||
}
|
}
|
||||||
|
|
||||||
Res = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, VT, Res,
|
bool IsSigned = Op->getOpcode() == ISD::SINT_TO_FP ||
|
||||||
DAG.getIntPtrConstant(0, DL));
|
Op->getOpcode() == ISD::STRICT_SINT_TO_FP;
|
||||||
|
if (VT != MVT::v4f32 || IsSigned)
|
||||||
|
return SDValue();
|
||||||
|
|
||||||
|
SDValue Zero = DAG.getConstant(0, DL, MVT::v4i64);
|
||||||
|
SDValue One = DAG.getConstant(1, DL, MVT::v4i64);
|
||||||
|
SDValue Sign = DAG.getNode(ISD::OR, DL, MVT::v4i64,
|
||||||
|
DAG.getNode(ISD::SRL, DL, MVT::v4i64, Src, One),
|
||||||
|
DAG.getNode(ISD::AND, DL, MVT::v4i64, Src, One));
|
||||||
|
SDValue IsNeg = DAG.getSetCC(DL, MVT::v4i64, Src, Zero, ISD::SETLT);
|
||||||
|
SDValue SignSrc = DAG.getSelect(DL, MVT::v4i64, IsNeg, Sign, Src);
|
||||||
|
SmallVector<SDValue, 4> SignCvts(4);
|
||||||
|
SmallVector<SDValue, 4> Chains(4);
|
||||||
|
for (int i = 0; i != 4; ++i) {
|
||||||
|
SDValue Src = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, MVT::i64, SignSrc,
|
||||||
|
DAG.getIntPtrConstant(i, DL));
|
||||||
|
if (IsStrict) {
|
||||||
|
SignCvts[i] =
|
||||||
|
DAG.getNode(ISD::STRICT_SINT_TO_FP, DL, {MVT::f32, MVT::Other},
|
||||||
|
{Op.getOperand(0), Src});
|
||||||
|
Chains[i] = SignCvts[i].getValue(1);
|
||||||
|
} else {
|
||||||
|
SignCvts[i] = DAG.getNode(ISD::SINT_TO_FP, DL, MVT::f32, Src);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
SDValue SignCvt = DAG.getBuildVector(VT, DL, SignCvts);
|
||||||
|
|
||||||
|
SDValue Slow, Chain;
|
||||||
|
if (IsStrict) {
|
||||||
|
Chain = DAG.getNode(ISD::TokenFactor, DL, MVT::Other, Chains);
|
||||||
|
Slow = DAG.getNode(ISD::STRICT_FADD, DL, {MVT::v4f32, MVT::Other},
|
||||||
|
{Chain, SignCvt, SignCvt});
|
||||||
|
Chain = Slow.getValue(1);
|
||||||
|
} else {
|
||||||
|
Slow = DAG.getNode(ISD::FADD, DL, MVT::v4f32, SignCvt, SignCvt);
|
||||||
|
}
|
||||||
|
|
||||||
|
IsNeg = DAG.getNode(ISD::TRUNCATE, DL, MVT::v4i32, IsNeg);
|
||||||
|
SDValue Cvt = DAG.getSelect(DL, MVT::v4f32, IsNeg, Slow, SignCvt);
|
||||||
|
|
||||||
if (IsStrict)
|
if (IsStrict)
|
||||||
return DAG.getMergeValues({Res, Chain}, DL);
|
return DAG.getMergeValues({Cvt, Chain}, DL);
|
||||||
return Res;
|
|
||||||
|
return Cvt;
|
||||||
}
|
}
|
||||||
|
|
||||||
SDValue X86TargetLowering::LowerSINT_TO_FP(SDValue Op,
|
SDValue X86TargetLowering::LowerSINT_TO_FP(SDValue Op,
|
||||||
@ -29011,6 +29069,49 @@ void X86TargetLowering::ReplaceNodeResults(SDNode *N,
|
|||||||
}
|
}
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
|
if (SrcVT == MVT::v2i64 && !IsSigned && Subtarget.is64Bit() &&
|
||||||
|
Subtarget.hasAVX() && !Subtarget.hasAVX512()) {
|
||||||
|
// TODO Any SSE41+ subtarget should work here but BLENDV codegen ends up
|
||||||
|
// a lot worse than it should be.
|
||||||
|
SDValue Zero = DAG.getConstant(0, dl, SrcVT);
|
||||||
|
SDValue One = DAG.getConstant(1, dl, SrcVT);
|
||||||
|
SDValue Sign = DAG.getNode(ISD::OR, dl, SrcVT,
|
||||||
|
DAG.getNode(ISD::SRL, dl, SrcVT, Src, One),
|
||||||
|
DAG.getNode(ISD::AND, dl, SrcVT, Src, One));
|
||||||
|
SDValue IsNeg = DAG.getSetCC(dl, MVT::v2i64, Src, Zero, ISD::SETLT);
|
||||||
|
SDValue SignSrc = DAG.getSelect(dl, SrcVT, IsNeg, Sign, Src);
|
||||||
|
SmallVector<SDValue, 4> SignCvts(4, DAG.getConstantFP(0.0, dl, MVT::f32));
|
||||||
|
for (int i = 0; i != 2; ++i) {
|
||||||
|
SDValue Src = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, MVT::i64,
|
||||||
|
SignSrc, DAG.getIntPtrConstant(i, dl));
|
||||||
|
if (IsStrict)
|
||||||
|
SignCvts[i] =
|
||||||
|
DAG.getNode(ISD::STRICT_SINT_TO_FP, dl, {MVT::f32, MVT::Other},
|
||||||
|
{N->getOperand(0), Src});
|
||||||
|
else
|
||||||
|
SignCvts[i] = DAG.getNode(ISD::SINT_TO_FP, dl, MVT::f32, Src);
|
||||||
|
};
|
||||||
|
SDValue SignCvt = DAG.getBuildVector(MVT::v4f32, dl, SignCvts);
|
||||||
|
SDValue Slow, Chain;
|
||||||
|
if (IsStrict) {
|
||||||
|
Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other,
|
||||||
|
SignCvts[0].getValue(1), SignCvts[1].getValue(1));
|
||||||
|
Slow = DAG.getNode(ISD::STRICT_FADD, dl, {MVT::v4f32, MVT::Other},
|
||||||
|
{Chain, SignCvt, SignCvt});
|
||||||
|
Chain = Slow.getValue(1);
|
||||||
|
} else {
|
||||||
|
Slow = DAG.getNode(ISD::FADD, dl, MVT::v4f32, SignCvt, SignCvt);
|
||||||
|
}
|
||||||
|
IsNeg = DAG.getBitcast(MVT::v4i32, IsNeg);
|
||||||
|
IsNeg =
|
||||||
|
DAG.getVectorShuffle(MVT::v4i32, dl, IsNeg, IsNeg, {1, 3, -1, -1});
|
||||||
|
SDValue Cvt = DAG.getSelect(dl, MVT::v4f32, IsNeg, Slow, SignCvt);
|
||||||
|
Results.push_back(Cvt);
|
||||||
|
if (IsStrict)
|
||||||
|
Results.push_back(Chain);
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
if (SrcVT != MVT::v2i32)
|
if (SrcVT != MVT::v2i32)
|
||||||
return;
|
return;
|
||||||
|
|
||||||
|
@ -311,33 +311,20 @@ define <2 x float> @uitofp_v2i64_v2f32(<2 x i64> %x) #0 {
|
|||||||
;
|
;
|
||||||
; AVX1-64-LABEL: uitofp_v2i64_v2f32:
|
; AVX1-64-LABEL: uitofp_v2i64_v2f32:
|
||||||
; AVX1-64: # %bb.0:
|
; AVX1-64: # %bb.0:
|
||||||
; AVX1-64-NEXT: vpextrq $1, %xmm0, %rax
|
; AVX1-64-NEXT: vpand {{.*}}(%rip), %xmm0, %xmm1
|
||||||
; AVX1-64-NEXT: movq %rax, %rcx
|
; AVX1-64-NEXT: vpsrlq $1, %xmm0, %xmm2
|
||||||
; AVX1-64-NEXT: shrq %rcx
|
; AVX1-64-NEXT: vpor %xmm1, %xmm2, %xmm1
|
||||||
; AVX1-64-NEXT: movl %eax, %edx
|
; AVX1-64-NEXT: vblendvpd %xmm0, %xmm1, %xmm0, %xmm1
|
||||||
; AVX1-64-NEXT: andl $1, %edx
|
; AVX1-64-NEXT: vpextrq $1, %xmm1, %rax
|
||||||
; AVX1-64-NEXT: orq %rcx, %rdx
|
; AVX1-64-NEXT: vcvtsi2ss %rax, %xmm3, %xmm2
|
||||||
; AVX1-64-NEXT: testq %rax, %rax
|
; AVX1-64-NEXT: vmovq %xmm1, %rax
|
||||||
; AVX1-64-NEXT: cmovnsq %rax, %rdx
|
; AVX1-64-NEXT: vcvtsi2ss %rax, %xmm3, %xmm1
|
||||||
; AVX1-64-NEXT: vcvtsi2ss %rdx, %xmm1, %xmm1
|
; AVX1-64-NEXT: vinsertps {{.*#+}} xmm1 = xmm1[0],xmm2[0],zero,zero
|
||||||
; AVX1-64-NEXT: jns .LBB3_2
|
; AVX1-64-NEXT: vaddps %xmm1, %xmm1, %xmm2
|
||||||
; AVX1-64-NEXT: # %bb.1:
|
; AVX1-64-NEXT: vpxor %xmm3, %xmm3, %xmm3
|
||||||
; AVX1-64-NEXT: vaddss %xmm1, %xmm1, %xmm1
|
; AVX1-64-NEXT: vpcmpgtq %xmm0, %xmm3, %xmm0
|
||||||
; AVX1-64-NEXT: .LBB3_2:
|
; AVX1-64-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[1,3,2,3]
|
||||||
; AVX1-64-NEXT: vmovq %xmm0, %rax
|
; AVX1-64-NEXT: vblendvps %xmm0, %xmm2, %xmm1, %xmm0
|
||||||
; AVX1-64-NEXT: movq %rax, %rcx
|
|
||||||
; AVX1-64-NEXT: shrq %rcx
|
|
||||||
; AVX1-64-NEXT: movl %eax, %edx
|
|
||||||
; AVX1-64-NEXT: andl $1, %edx
|
|
||||||
; AVX1-64-NEXT: orq %rcx, %rdx
|
|
||||||
; AVX1-64-NEXT: testq %rax, %rax
|
|
||||||
; AVX1-64-NEXT: cmovnsq %rax, %rdx
|
|
||||||
; AVX1-64-NEXT: vcvtsi2ss %rdx, %xmm2, %xmm0
|
|
||||||
; AVX1-64-NEXT: jns .LBB3_4
|
|
||||||
; AVX1-64-NEXT: # %bb.3:
|
|
||||||
; AVX1-64-NEXT: vaddss %xmm0, %xmm0, %xmm0
|
|
||||||
; AVX1-64-NEXT: .LBB3_4:
|
|
||||||
; AVX1-64-NEXT: vinsertps {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[2,3]
|
|
||||||
; AVX1-64-NEXT: retq
|
; AVX1-64-NEXT: retq
|
||||||
;
|
;
|
||||||
; AVX512F-64-LABEL: uitofp_v2i64_v2f32:
|
; AVX512F-64-LABEL: uitofp_v2i64_v2f32:
|
||||||
|
@ -1058,123 +1058,59 @@ define <4 x float> @uitofp_v4i64_v4f32(<4 x i64> %x) #0 {
|
|||||||
;
|
;
|
||||||
; AVX1-64-LABEL: uitofp_v4i64_v4f32:
|
; AVX1-64-LABEL: uitofp_v4i64_v4f32:
|
||||||
; AVX1-64: # %bb.0:
|
; AVX1-64: # %bb.0:
|
||||||
; AVX1-64-NEXT: vpextrq $1, %xmm0, %rax
|
; AVX1-64-NEXT: vpsrlq $1, %xmm0, %xmm1
|
||||||
; AVX1-64-NEXT: movq %rax, %rcx
|
; AVX1-64-NEXT: vextractf128 $1, %ymm0, %xmm2
|
||||||
; AVX1-64-NEXT: shrq %rcx
|
; AVX1-64-NEXT: vpsrlq $1, %xmm2, %xmm3
|
||||||
; AVX1-64-NEXT: movl %eax, %edx
|
; AVX1-64-NEXT: vinsertf128 $1, %xmm3, %ymm1, %ymm1
|
||||||
; AVX1-64-NEXT: andl $1, %edx
|
; AVX1-64-NEXT: vandpd {{.*}}(%rip), %ymm0, %ymm3
|
||||||
; AVX1-64-NEXT: orq %rcx, %rdx
|
; AVX1-64-NEXT: vorpd %ymm3, %ymm1, %ymm1
|
||||||
; AVX1-64-NEXT: testq %rax, %rax
|
; AVX1-64-NEXT: vblendvpd %xmm0, %xmm1, %xmm0, %xmm3
|
||||||
; AVX1-64-NEXT: cmovnsq %rax, %rdx
|
; AVX1-64-NEXT: vpextrq $1, %xmm3, %rax
|
||||||
; AVX1-64-NEXT: vcvtsi2ss %rdx, %xmm1, %xmm1
|
; AVX1-64-NEXT: vcvtsi2ss %rax, %xmm4, %xmm4
|
||||||
; AVX1-64-NEXT: jns .LBB19_2
|
; AVX1-64-NEXT: vmovq %xmm3, %rax
|
||||||
; AVX1-64-NEXT: # %bb.1:
|
; AVX1-64-NEXT: vcvtsi2ss %rax, %xmm5, %xmm3
|
||||||
; AVX1-64-NEXT: vaddss %xmm1, %xmm1, %xmm1
|
; AVX1-64-NEXT: vinsertps {{.*#+}} xmm3 = xmm3[0],xmm4[0],xmm3[2,3]
|
||||||
; AVX1-64-NEXT: .LBB19_2:
|
; AVX1-64-NEXT: vextractf128 $1, %ymm1, %xmm1
|
||||||
; AVX1-64-NEXT: vmovq %xmm0, %rax
|
; AVX1-64-NEXT: vblendvpd %xmm2, %xmm1, %xmm2, %xmm1
|
||||||
; AVX1-64-NEXT: movq %rax, %rcx
|
; AVX1-64-NEXT: vmovq %xmm1, %rax
|
||||||
; AVX1-64-NEXT: shrq %rcx
|
; AVX1-64-NEXT: vcvtsi2ss %rax, %xmm5, %xmm4
|
||||||
; AVX1-64-NEXT: movl %eax, %edx
|
; AVX1-64-NEXT: vinsertps {{.*#+}} xmm3 = xmm3[0,1],xmm4[0],xmm3[3]
|
||||||
; AVX1-64-NEXT: andl $1, %edx
|
; AVX1-64-NEXT: vpextrq $1, %xmm1, %rax
|
||||||
; AVX1-64-NEXT: orq %rcx, %rdx
|
; AVX1-64-NEXT: vcvtsi2ss %rax, %xmm5, %xmm1
|
||||||
; AVX1-64-NEXT: testq %rax, %rax
|
; AVX1-64-NEXT: vinsertps {{.*#+}} xmm1 = xmm3[0,1,2],xmm1[0]
|
||||||
; AVX1-64-NEXT: cmovnsq %rax, %rdx
|
; AVX1-64-NEXT: vaddps %xmm1, %xmm1, %xmm3
|
||||||
; AVX1-64-NEXT: vcvtsi2ss %rdx, %xmm2, %xmm2
|
; AVX1-64-NEXT: vxorps %xmm4, %xmm4, %xmm4
|
||||||
; AVX1-64-NEXT: jns .LBB19_4
|
; AVX1-64-NEXT: vpcmpgtq %xmm2, %xmm4, %xmm2
|
||||||
; AVX1-64-NEXT: # %bb.3:
|
; AVX1-64-NEXT: vpackssdw %xmm2, %xmm0, %xmm0
|
||||||
; AVX1-64-NEXT: vaddss %xmm2, %xmm2, %xmm2
|
; AVX1-64-NEXT: vblendvps %xmm0, %xmm3, %xmm1, %xmm0
|
||||||
; AVX1-64-NEXT: .LBB19_4:
|
|
||||||
; AVX1-64-NEXT: vinsertps {{.*#+}} xmm1 = xmm2[0],xmm1[0],xmm2[2,3]
|
|
||||||
; AVX1-64-NEXT: vextractf128 $1, %ymm0, %xmm0
|
|
||||||
; AVX1-64-NEXT: vmovq %xmm0, %rax
|
|
||||||
; AVX1-64-NEXT: movq %rax, %rcx
|
|
||||||
; AVX1-64-NEXT: shrq %rcx
|
|
||||||
; AVX1-64-NEXT: movl %eax, %edx
|
|
||||||
; AVX1-64-NEXT: andl $1, %edx
|
|
||||||
; AVX1-64-NEXT: orq %rcx, %rdx
|
|
||||||
; AVX1-64-NEXT: testq %rax, %rax
|
|
||||||
; AVX1-64-NEXT: cmovnsq %rax, %rdx
|
|
||||||
; AVX1-64-NEXT: vcvtsi2ss %rdx, %xmm3, %xmm2
|
|
||||||
; AVX1-64-NEXT: jns .LBB19_6
|
|
||||||
; AVX1-64-NEXT: # %bb.5:
|
|
||||||
; AVX1-64-NEXT: vaddss %xmm2, %xmm2, %xmm2
|
|
||||||
; AVX1-64-NEXT: .LBB19_6:
|
|
||||||
; AVX1-64-NEXT: vinsertps {{.*#+}} xmm1 = xmm1[0,1],xmm2[0],xmm1[3]
|
|
||||||
; AVX1-64-NEXT: vpextrq $1, %xmm0, %rax
|
|
||||||
; AVX1-64-NEXT: movq %rax, %rcx
|
|
||||||
; AVX1-64-NEXT: shrq %rcx
|
|
||||||
; AVX1-64-NEXT: movl %eax, %edx
|
|
||||||
; AVX1-64-NEXT: andl $1, %edx
|
|
||||||
; AVX1-64-NEXT: orq %rcx, %rdx
|
|
||||||
; AVX1-64-NEXT: testq %rax, %rax
|
|
||||||
; AVX1-64-NEXT: cmovnsq %rax, %rdx
|
|
||||||
; AVX1-64-NEXT: vcvtsi2ss %rdx, %xmm3, %xmm0
|
|
||||||
; AVX1-64-NEXT: jns .LBB19_8
|
|
||||||
; AVX1-64-NEXT: # %bb.7:
|
|
||||||
; AVX1-64-NEXT: vaddss %xmm0, %xmm0, %xmm0
|
|
||||||
; AVX1-64-NEXT: .LBB19_8:
|
|
||||||
; AVX1-64-NEXT: vinsertps {{.*#+}} xmm0 = xmm1[0,1,2],xmm0[0]
|
|
||||||
; AVX1-64-NEXT: vzeroupper
|
; AVX1-64-NEXT: vzeroupper
|
||||||
; AVX1-64-NEXT: retq
|
; AVX1-64-NEXT: retq
|
||||||
;
|
;
|
||||||
; AVX2-64-LABEL: uitofp_v4i64_v4f32:
|
; AVX2-64-LABEL: uitofp_v4i64_v4f32:
|
||||||
; AVX2-64: # %bb.0:
|
; AVX2-64: # %bb.0:
|
||||||
|
; AVX2-64-NEXT: vpxor %xmm1, %xmm1, %xmm1
|
||||||
|
; AVX2-64-NEXT: vpcmpgtq %ymm0, %ymm1, %ymm1
|
||||||
|
; AVX2-64-NEXT: vextracti128 $1, %ymm1, %xmm2
|
||||||
|
; AVX2-64-NEXT: vpackssdw %xmm2, %xmm1, %xmm1
|
||||||
|
; AVX2-64-NEXT: vpbroadcastq {{.*#+}} ymm2 = [1,1,1,1]
|
||||||
|
; AVX2-64-NEXT: vpand %ymm2, %ymm0, %ymm2
|
||||||
|
; AVX2-64-NEXT: vpsrlq $1, %ymm0, %ymm3
|
||||||
|
; AVX2-64-NEXT: vpor %ymm2, %ymm3, %ymm2
|
||||||
|
; AVX2-64-NEXT: vblendvpd %ymm0, %ymm2, %ymm0, %ymm0
|
||||||
; AVX2-64-NEXT: vpextrq $1, %xmm0, %rax
|
; AVX2-64-NEXT: vpextrq $1, %xmm0, %rax
|
||||||
; AVX2-64-NEXT: movq %rax, %rcx
|
; AVX2-64-NEXT: vcvtsi2ss %rax, %xmm4, %xmm2
|
||||||
; AVX2-64-NEXT: shrq %rcx
|
|
||||||
; AVX2-64-NEXT: movl %eax, %edx
|
|
||||||
; AVX2-64-NEXT: andl $1, %edx
|
|
||||||
; AVX2-64-NEXT: orq %rcx, %rdx
|
|
||||||
; AVX2-64-NEXT: testq %rax, %rax
|
|
||||||
; AVX2-64-NEXT: cmovnsq %rax, %rdx
|
|
||||||
; AVX2-64-NEXT: vcvtsi2ss %rdx, %xmm1, %xmm1
|
|
||||||
; AVX2-64-NEXT: jns .LBB19_2
|
|
||||||
; AVX2-64-NEXT: # %bb.1:
|
|
||||||
; AVX2-64-NEXT: vaddss %xmm1, %xmm1, %xmm1
|
|
||||||
; AVX2-64-NEXT: .LBB19_2:
|
|
||||||
; AVX2-64-NEXT: vmovq %xmm0, %rax
|
; AVX2-64-NEXT: vmovq %xmm0, %rax
|
||||||
; AVX2-64-NEXT: movq %rax, %rcx
|
; AVX2-64-NEXT: vcvtsi2ss %rax, %xmm4, %xmm3
|
||||||
; AVX2-64-NEXT: shrq %rcx
|
; AVX2-64-NEXT: vinsertps {{.*#+}} xmm2 = xmm3[0],xmm2[0],xmm3[2,3]
|
||||||
; AVX2-64-NEXT: movl %eax, %edx
|
|
||||||
; AVX2-64-NEXT: andl $1, %edx
|
|
||||||
; AVX2-64-NEXT: orq %rcx, %rdx
|
|
||||||
; AVX2-64-NEXT: testq %rax, %rax
|
|
||||||
; AVX2-64-NEXT: cmovnsq %rax, %rdx
|
|
||||||
; AVX2-64-NEXT: vcvtsi2ss %rdx, %xmm2, %xmm2
|
|
||||||
; AVX2-64-NEXT: jns .LBB19_4
|
|
||||||
; AVX2-64-NEXT: # %bb.3:
|
|
||||||
; AVX2-64-NEXT: vaddss %xmm2, %xmm2, %xmm2
|
|
||||||
; AVX2-64-NEXT: .LBB19_4:
|
|
||||||
; AVX2-64-NEXT: vinsertps {{.*#+}} xmm1 = xmm2[0],xmm1[0],xmm2[2,3]
|
|
||||||
; AVX2-64-NEXT: vextracti128 $1, %ymm0, %xmm0
|
; AVX2-64-NEXT: vextracti128 $1, %ymm0, %xmm0
|
||||||
; AVX2-64-NEXT: vmovq %xmm0, %rax
|
; AVX2-64-NEXT: vmovq %xmm0, %rax
|
||||||
; AVX2-64-NEXT: movq %rax, %rcx
|
; AVX2-64-NEXT: vcvtsi2ss %rax, %xmm4, %xmm3
|
||||||
; AVX2-64-NEXT: shrq %rcx
|
; AVX2-64-NEXT: vinsertps {{.*#+}} xmm2 = xmm2[0,1],xmm3[0],xmm2[3]
|
||||||
; AVX2-64-NEXT: movl %eax, %edx
|
|
||||||
; AVX2-64-NEXT: andl $1, %edx
|
|
||||||
; AVX2-64-NEXT: orq %rcx, %rdx
|
|
||||||
; AVX2-64-NEXT: testq %rax, %rax
|
|
||||||
; AVX2-64-NEXT: cmovnsq %rax, %rdx
|
|
||||||
; AVX2-64-NEXT: vcvtsi2ss %rdx, %xmm3, %xmm2
|
|
||||||
; AVX2-64-NEXT: jns .LBB19_6
|
|
||||||
; AVX2-64-NEXT: # %bb.5:
|
|
||||||
; AVX2-64-NEXT: vaddss %xmm2, %xmm2, %xmm2
|
|
||||||
; AVX2-64-NEXT: .LBB19_6:
|
|
||||||
; AVX2-64-NEXT: vinsertps {{.*#+}} xmm1 = xmm1[0,1],xmm2[0],xmm1[3]
|
|
||||||
; AVX2-64-NEXT: vpextrq $1, %xmm0, %rax
|
; AVX2-64-NEXT: vpextrq $1, %xmm0, %rax
|
||||||
; AVX2-64-NEXT: movq %rax, %rcx
|
; AVX2-64-NEXT: vcvtsi2ss %rax, %xmm4, %xmm0
|
||||||
; AVX2-64-NEXT: shrq %rcx
|
; AVX2-64-NEXT: vinsertps {{.*#+}} xmm0 = xmm2[0,1,2],xmm0[0]
|
||||||
; AVX2-64-NEXT: movl %eax, %edx
|
; AVX2-64-NEXT: vaddps %xmm0, %xmm0, %xmm2
|
||||||
; AVX2-64-NEXT: andl $1, %edx
|
; AVX2-64-NEXT: vblendvps %xmm1, %xmm2, %xmm0, %xmm0
|
||||||
; AVX2-64-NEXT: orq %rcx, %rdx
|
|
||||||
; AVX2-64-NEXT: testq %rax, %rax
|
|
||||||
; AVX2-64-NEXT: cmovnsq %rax, %rdx
|
|
||||||
; AVX2-64-NEXT: vcvtsi2ss %rdx, %xmm3, %xmm0
|
|
||||||
; AVX2-64-NEXT: jns .LBB19_8
|
|
||||||
; AVX2-64-NEXT: # %bb.7:
|
|
||||||
; AVX2-64-NEXT: vaddss %xmm0, %xmm0, %xmm0
|
|
||||||
; AVX2-64-NEXT: .LBB19_8:
|
|
||||||
; AVX2-64-NEXT: vinsertps {{.*#+}} xmm0 = xmm1[0,1,2],xmm0[0]
|
|
||||||
; AVX2-64-NEXT: vzeroupper
|
; AVX2-64-NEXT: vzeroupper
|
||||||
; AVX2-64-NEXT: retq
|
; AVX2-64-NEXT: retq
|
||||||
;
|
;
|
||||||
|
@ -1924,35 +1924,20 @@ define <4 x float> @uitofp_2i64_to_4f32(<2 x i64> %a) {
|
|||||||
;
|
;
|
||||||
; VEX-LABEL: uitofp_2i64_to_4f32:
|
; VEX-LABEL: uitofp_2i64_to_4f32:
|
||||||
; VEX: # %bb.0:
|
; VEX: # %bb.0:
|
||||||
; VEX-NEXT: vpextrq $1, %xmm0, %rax
|
; VEX-NEXT: vpand {{.*}}(%rip), %xmm0, %xmm1
|
||||||
; VEX-NEXT: testq %rax, %rax
|
; VEX-NEXT: vpsrlq $1, %xmm0, %xmm2
|
||||||
; VEX-NEXT: js .LBB41_1
|
; VEX-NEXT: vpor %xmm1, %xmm2, %xmm1
|
||||||
; VEX-NEXT: # %bb.2:
|
; VEX-NEXT: vblendvpd %xmm0, %xmm1, %xmm0, %xmm1
|
||||||
; VEX-NEXT: vcvtsi2ss %rax, %xmm1, %xmm1
|
; VEX-NEXT: vpextrq $1, %xmm1, %rax
|
||||||
; VEX-NEXT: jmp .LBB41_3
|
; VEX-NEXT: vcvtsi2ss %rax, %xmm3, %xmm2
|
||||||
; VEX-NEXT: .LBB41_1:
|
; VEX-NEXT: vmovq %xmm1, %rax
|
||||||
; VEX-NEXT: movq %rax, %rcx
|
; VEX-NEXT: vcvtsi2ss %rax, %xmm3, %xmm1
|
||||||
; VEX-NEXT: shrq %rcx
|
; VEX-NEXT: vinsertps {{.*#+}} xmm1 = xmm1[0],xmm2[0],zero,zero
|
||||||
; VEX-NEXT: andl $1, %eax
|
; VEX-NEXT: vaddps %xmm1, %xmm1, %xmm2
|
||||||
; VEX-NEXT: orq %rcx, %rax
|
; VEX-NEXT: vpxor %xmm3, %xmm3, %xmm3
|
||||||
; VEX-NEXT: vcvtsi2ss %rax, %xmm1, %xmm1
|
; VEX-NEXT: vpcmpgtq %xmm0, %xmm3, %xmm0
|
||||||
; VEX-NEXT: vaddss %xmm1, %xmm1, %xmm1
|
; VEX-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[1,3,2,3]
|
||||||
; VEX-NEXT: .LBB41_3:
|
; VEX-NEXT: vblendvps %xmm0, %xmm2, %xmm1, %xmm0
|
||||||
; VEX-NEXT: vmovq %xmm0, %rax
|
|
||||||
; VEX-NEXT: testq %rax, %rax
|
|
||||||
; VEX-NEXT: js .LBB41_4
|
|
||||||
; VEX-NEXT: # %bb.5:
|
|
||||||
; VEX-NEXT: vcvtsi2ss %rax, %xmm2, %xmm0
|
|
||||||
; VEX-NEXT: vinsertps {{.*#+}} xmm0 = xmm0[0],xmm1[0],zero,zero
|
|
||||||
; VEX-NEXT: retq
|
|
||||||
; VEX-NEXT: .LBB41_4:
|
|
||||||
; VEX-NEXT: movq %rax, %rcx
|
|
||||||
; VEX-NEXT: shrq %rcx
|
|
||||||
; VEX-NEXT: andl $1, %eax
|
|
||||||
; VEX-NEXT: orq %rcx, %rax
|
|
||||||
; VEX-NEXT: vcvtsi2ss %rax, %xmm2, %xmm0
|
|
||||||
; VEX-NEXT: vaddss %xmm0, %xmm0, %xmm0
|
|
||||||
; VEX-NEXT: vinsertps {{.*#+}} xmm0 = xmm0[0],xmm1[0],zero,zero
|
|
||||||
; VEX-NEXT: retq
|
; VEX-NEXT: retq
|
||||||
;
|
;
|
||||||
; AVX512F-LABEL: uitofp_2i64_to_4f32:
|
; AVX512F-LABEL: uitofp_2i64_to_4f32:
|
||||||
@ -2071,35 +2056,21 @@ define <4 x float> @uitofp_2i64_to_2f32(<2 x i64> %a) {
|
|||||||
;
|
;
|
||||||
; VEX-LABEL: uitofp_2i64_to_2f32:
|
; VEX-LABEL: uitofp_2i64_to_2f32:
|
||||||
; VEX: # %bb.0:
|
; VEX: # %bb.0:
|
||||||
; VEX-NEXT: vmovq %xmm0, %rax
|
; VEX-NEXT: vpand {{.*}}(%rip), %xmm0, %xmm1
|
||||||
; VEX-NEXT: testq %rax, %rax
|
; VEX-NEXT: vpsrlq $1, %xmm0, %xmm2
|
||||||
; VEX-NEXT: js .LBB42_1
|
; VEX-NEXT: vpor %xmm1, %xmm2, %xmm1
|
||||||
; VEX-NEXT: # %bb.2:
|
; VEX-NEXT: vblendvpd %xmm0, %xmm1, %xmm0, %xmm1
|
||||||
; VEX-NEXT: vcvtsi2ss %rax, %xmm1, %xmm1
|
; VEX-NEXT: vpextrq $1, %xmm1, %rax
|
||||||
; VEX-NEXT: jmp .LBB42_3
|
; VEX-NEXT: vcvtsi2ss %rax, %xmm3, %xmm2
|
||||||
; VEX-NEXT: .LBB42_1:
|
; VEX-NEXT: vmovq %xmm1, %rax
|
||||||
; VEX-NEXT: movq %rax, %rcx
|
; VEX-NEXT: vcvtsi2ss %rax, %xmm3, %xmm1
|
||||||
; VEX-NEXT: shrq %rcx
|
; VEX-NEXT: vinsertps {{.*#+}} xmm1 = xmm1[0],xmm2[0],zero,zero
|
||||||
; VEX-NEXT: andl $1, %eax
|
; VEX-NEXT: vaddps %xmm1, %xmm1, %xmm2
|
||||||
; VEX-NEXT: orq %rcx, %rax
|
; VEX-NEXT: vpxor %xmm3, %xmm3, %xmm3
|
||||||
; VEX-NEXT: vcvtsi2ss %rax, %xmm1, %xmm1
|
; VEX-NEXT: vpcmpgtq %xmm0, %xmm3, %xmm0
|
||||||
; VEX-NEXT: vaddss %xmm1, %xmm1, %xmm1
|
; VEX-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[1,3,2,3]
|
||||||
; VEX-NEXT: .LBB42_3:
|
; VEX-NEXT: vblendvps %xmm0, %xmm2, %xmm1, %xmm0
|
||||||
; VEX-NEXT: vpextrq $1, %xmm0, %rax
|
; VEX-NEXT: vmovq {{.*#+}} xmm0 = xmm0[0],zero
|
||||||
; VEX-NEXT: testq %rax, %rax
|
|
||||||
; VEX-NEXT: js .LBB42_4
|
|
||||||
; VEX-NEXT: # %bb.5:
|
|
||||||
; VEX-NEXT: vcvtsi2ss %rax, %xmm2, %xmm0
|
|
||||||
; VEX-NEXT: vinsertps {{.*#+}} xmm0 = xmm1[0],xmm0[0],zero,zero
|
|
||||||
; VEX-NEXT: retq
|
|
||||||
; VEX-NEXT: .LBB42_4:
|
|
||||||
; VEX-NEXT: movq %rax, %rcx
|
|
||||||
; VEX-NEXT: shrq %rcx
|
|
||||||
; VEX-NEXT: andl $1, %eax
|
|
||||||
; VEX-NEXT: orq %rcx, %rax
|
|
||||||
; VEX-NEXT: vcvtsi2ss %rax, %xmm2, %xmm0
|
|
||||||
; VEX-NEXT: vaddss %xmm0, %xmm0, %xmm0
|
|
||||||
; VEX-NEXT: vinsertps {{.*#+}} xmm0 = xmm1[0],xmm0[0],zero,zero
|
|
||||||
; VEX-NEXT: retq
|
; VEX-NEXT: retq
|
||||||
;
|
;
|
||||||
; AVX512F-LABEL: uitofp_2i64_to_2f32:
|
; AVX512F-LABEL: uitofp_2i64_to_2f32:
|
||||||
@ -2211,38 +2182,60 @@ define <4 x float> @uitofp_4i64_to_4f32_undef(<2 x i64> %a) {
|
|||||||
; SSE41-NEXT: insertps {{.*#+}} xmm0 = xmm0[0],xmm1[0],zero,zero
|
; SSE41-NEXT: insertps {{.*#+}} xmm0 = xmm0[0],xmm1[0],zero,zero
|
||||||
; SSE41-NEXT: retq
|
; SSE41-NEXT: retq
|
||||||
;
|
;
|
||||||
; VEX-LABEL: uitofp_4i64_to_4f32_undef:
|
; AVX1-LABEL: uitofp_4i64_to_4f32_undef:
|
||||||
; VEX: # %bb.0:
|
; AVX1: # %bb.0:
|
||||||
; VEX-NEXT: vpextrq $1, %xmm0, %rax
|
; AVX1-NEXT: # kill: def $xmm0 killed $xmm0 def $ymm0
|
||||||
; VEX-NEXT: testq %rax, %rax
|
; AVX1-NEXT: vandps {{.*}}(%rip), %ymm0, %ymm1
|
||||||
; VEX-NEXT: js .LBB43_1
|
; AVX1-NEXT: vpsrlq $1, %xmm0, %xmm2
|
||||||
; VEX-NEXT: # %bb.2:
|
; AVX1-NEXT: vorps %ymm1, %ymm2, %ymm1
|
||||||
; VEX-NEXT: vcvtsi2ss %rax, %xmm1, %xmm1
|
; AVX1-NEXT: vblendvpd %xmm0, %xmm1, %xmm0, %xmm2
|
||||||
; VEX-NEXT: jmp .LBB43_3
|
; AVX1-NEXT: vpextrq $1, %xmm2, %rax
|
||||||
; VEX-NEXT: .LBB43_1:
|
; AVX1-NEXT: vcvtsi2ss %rax, %xmm3, %xmm3
|
||||||
; VEX-NEXT: movq %rax, %rcx
|
; AVX1-NEXT: vmovq %xmm2, %rax
|
||||||
; VEX-NEXT: shrq %rcx
|
; AVX1-NEXT: vcvtsi2ss %rax, %xmm4, %xmm2
|
||||||
; VEX-NEXT: andl $1, %eax
|
; AVX1-NEXT: vinsertps {{.*#+}} xmm2 = xmm2[0],xmm3[0],xmm2[2,3]
|
||||||
; VEX-NEXT: orq %rcx, %rax
|
; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm1
|
||||||
; VEX-NEXT: vcvtsi2ss %rax, %xmm1, %xmm1
|
; AVX1-NEXT: vcvtdq2ps %xmm1, %xmm3
|
||||||
; VEX-NEXT: vaddss %xmm1, %xmm1, %xmm1
|
; AVX1-NEXT: vinsertps {{.*#+}} xmm2 = xmm2[0,1],xmm3[0],xmm2[3]
|
||||||
; VEX-NEXT: .LBB43_3:
|
; AVX1-NEXT: vpextrq $1, %xmm1, %rax
|
||||||
; VEX-NEXT: vmovq %xmm0, %rax
|
; AVX1-NEXT: vcvtsi2ss %eax, %xmm4, %xmm1
|
||||||
; VEX-NEXT: testq %rax, %rax
|
; AVX1-NEXT: vinsertps {{.*#+}} xmm1 = xmm2[0,1,2],xmm1[0]
|
||||||
; VEX-NEXT: js .LBB43_4
|
; AVX1-NEXT: vaddps %xmm1, %xmm1, %xmm2
|
||||||
; VEX-NEXT: # %bb.5:
|
; AVX1-NEXT: vxorps %xmm3, %xmm3, %xmm3
|
||||||
; VEX-NEXT: vcvtsi2ss %rax, %xmm2, %xmm0
|
; AVX1-NEXT: vpcmpgtq %xmm0, %xmm3, %xmm3
|
||||||
; VEX-NEXT: vinsertps {{.*#+}} xmm0 = xmm0[0],xmm1[0],zero,zero
|
; AVX1-NEXT: vpackssdw %xmm3, %xmm0, %xmm0
|
||||||
; VEX-NEXT: retq
|
; AVX1-NEXT: vblendvps %xmm0, %xmm2, %xmm1, %xmm0
|
||||||
; VEX-NEXT: .LBB43_4:
|
; AVX1-NEXT: vzeroupper
|
||||||
; VEX-NEXT: movq %rax, %rcx
|
; AVX1-NEXT: retq
|
||||||
; VEX-NEXT: shrq %rcx
|
;
|
||||||
; VEX-NEXT: andl $1, %eax
|
; AVX2-LABEL: uitofp_4i64_to_4f32_undef:
|
||||||
; VEX-NEXT: orq %rcx, %rax
|
; AVX2: # %bb.0:
|
||||||
; VEX-NEXT: vcvtsi2ss %rax, %xmm2, %xmm0
|
; AVX2-NEXT: # kill: def $xmm0 killed $xmm0 def $ymm0
|
||||||
; VEX-NEXT: vaddss %xmm0, %xmm0, %xmm0
|
; AVX2-NEXT: vpbroadcastq {{.*#+}} ymm1 = [1,1,1,1]
|
||||||
; VEX-NEXT: vinsertps {{.*#+}} xmm0 = xmm0[0],xmm1[0],zero,zero
|
; AVX2-NEXT: vpand %ymm1, %ymm0, %ymm1
|
||||||
; VEX-NEXT: retq
|
; AVX2-NEXT: vpsrlq $1, %ymm0, %ymm2
|
||||||
|
; AVX2-NEXT: vpor %ymm1, %ymm2, %ymm1
|
||||||
|
; AVX2-NEXT: vblendvpd %ymm0, %ymm1, %ymm0, %ymm1
|
||||||
|
; AVX2-NEXT: vpextrq $1, %xmm1, %rax
|
||||||
|
; AVX2-NEXT: vcvtsi2ss %rax, %xmm3, %xmm2
|
||||||
|
; AVX2-NEXT: vmovq %xmm1, %rax
|
||||||
|
; AVX2-NEXT: vcvtsi2ss %rax, %xmm3, %xmm3
|
||||||
|
; AVX2-NEXT: vinsertps {{.*#+}} xmm2 = xmm3[0],xmm2[0],xmm3[2,3]
|
||||||
|
; AVX2-NEXT: vextracti128 $1, %ymm1, %xmm1
|
||||||
|
; AVX2-NEXT: vmovq %xmm1, %rax
|
||||||
|
; AVX2-NEXT: vcvtsi2ss %rax, %xmm4, %xmm3
|
||||||
|
; AVX2-NEXT: vinsertps {{.*#+}} xmm2 = xmm2[0,1],xmm3[0],xmm2[3]
|
||||||
|
; AVX2-NEXT: vpextrq $1, %xmm1, %rax
|
||||||
|
; AVX2-NEXT: vcvtsi2ss %rax, %xmm4, %xmm1
|
||||||
|
; AVX2-NEXT: vinsertps {{.*#+}} xmm1 = xmm2[0,1,2],xmm1[0]
|
||||||
|
; AVX2-NEXT: vaddps %xmm1, %xmm1, %xmm2
|
||||||
|
; AVX2-NEXT: vxorps %xmm3, %xmm3, %xmm3
|
||||||
|
; AVX2-NEXT: vpcmpgtq %ymm0, %ymm3, %ymm0
|
||||||
|
; AVX2-NEXT: vextracti128 $1, %ymm0, %xmm3
|
||||||
|
; AVX2-NEXT: vpackssdw %xmm3, %xmm0, %xmm0
|
||||||
|
; AVX2-NEXT: vblendvps %xmm0, %xmm2, %xmm1, %xmm0
|
||||||
|
; AVX2-NEXT: vzeroupper
|
||||||
|
; AVX2-NEXT: retq
|
||||||
;
|
;
|
||||||
; AVX512F-LABEL: uitofp_4i64_to_4f32_undef:
|
; AVX512F-LABEL: uitofp_4i64_to_4f32_undef:
|
||||||
; AVX512F: # %bb.0:
|
; AVX512F: # %bb.0:
|
||||||
@ -2636,133 +2629,59 @@ define <4 x float> @uitofp_4i64_to_4f32(<4 x i64> %a) {
|
|||||||
;
|
;
|
||||||
; AVX1-LABEL: uitofp_4i64_to_4f32:
|
; AVX1-LABEL: uitofp_4i64_to_4f32:
|
||||||
; AVX1: # %bb.0:
|
; AVX1: # %bb.0:
|
||||||
; AVX1-NEXT: vpextrq $1, %xmm0, %rax
|
; AVX1-NEXT: vpsrlq $1, %xmm0, %xmm1
|
||||||
; AVX1-NEXT: testq %rax, %rax
|
; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm2
|
||||||
; AVX1-NEXT: js .LBB49_1
|
; AVX1-NEXT: vpsrlq $1, %xmm2, %xmm3
|
||||||
; AVX1-NEXT: # %bb.2:
|
; AVX1-NEXT: vinsertf128 $1, %xmm3, %ymm1, %ymm1
|
||||||
; AVX1-NEXT: vcvtsi2ss %rax, %xmm1, %xmm1
|
; AVX1-NEXT: vandpd {{.*}}(%rip), %ymm0, %ymm3
|
||||||
; AVX1-NEXT: jmp .LBB49_3
|
; AVX1-NEXT: vorpd %ymm3, %ymm1, %ymm1
|
||||||
; AVX1-NEXT: .LBB49_1:
|
; AVX1-NEXT: vblendvpd %xmm0, %xmm1, %xmm0, %xmm3
|
||||||
; AVX1-NEXT: movq %rax, %rcx
|
; AVX1-NEXT: vpextrq $1, %xmm3, %rax
|
||||||
; AVX1-NEXT: shrq %rcx
|
; AVX1-NEXT: vcvtsi2ss %rax, %xmm4, %xmm4
|
||||||
; AVX1-NEXT: andl $1, %eax
|
; AVX1-NEXT: vmovq %xmm3, %rax
|
||||||
; AVX1-NEXT: orq %rcx, %rax
|
; AVX1-NEXT: vcvtsi2ss %rax, %xmm5, %xmm3
|
||||||
; AVX1-NEXT: vcvtsi2ss %rax, %xmm1, %xmm1
|
; AVX1-NEXT: vinsertps {{.*#+}} xmm3 = xmm3[0],xmm4[0],xmm3[2,3]
|
||||||
; AVX1-NEXT: vaddss %xmm1, %xmm1, %xmm1
|
; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm1
|
||||||
; AVX1-NEXT: .LBB49_3:
|
; AVX1-NEXT: vblendvpd %xmm2, %xmm1, %xmm2, %xmm1
|
||||||
; AVX1-NEXT: vmovq %xmm0, %rax
|
; AVX1-NEXT: vmovq %xmm1, %rax
|
||||||
; AVX1-NEXT: testq %rax, %rax
|
; AVX1-NEXT: vcvtsi2ss %rax, %xmm5, %xmm4
|
||||||
; AVX1-NEXT: js .LBB49_4
|
; AVX1-NEXT: vinsertps {{.*#+}} xmm3 = xmm3[0,1],xmm4[0],xmm3[3]
|
||||||
; AVX1-NEXT: # %bb.5:
|
; AVX1-NEXT: vpextrq $1, %xmm1, %rax
|
||||||
; AVX1-NEXT: vcvtsi2ss %rax, %xmm2, %xmm2
|
; AVX1-NEXT: vcvtsi2ss %rax, %xmm5, %xmm1
|
||||||
; AVX1-NEXT: jmp .LBB49_6
|
; AVX1-NEXT: vinsertps {{.*#+}} xmm1 = xmm3[0,1,2],xmm1[0]
|
||||||
; AVX1-NEXT: .LBB49_4:
|
; AVX1-NEXT: vaddps %xmm1, %xmm1, %xmm3
|
||||||
; AVX1-NEXT: movq %rax, %rcx
|
; AVX1-NEXT: vxorps %xmm4, %xmm4, %xmm4
|
||||||
; AVX1-NEXT: shrq %rcx
|
; AVX1-NEXT: vpcmpgtq %xmm2, %xmm4, %xmm2
|
||||||
; AVX1-NEXT: andl $1, %eax
|
; AVX1-NEXT: vpackssdw %xmm2, %xmm0, %xmm0
|
||||||
; AVX1-NEXT: orq %rcx, %rax
|
; AVX1-NEXT: vblendvps %xmm0, %xmm3, %xmm1, %xmm0
|
||||||
; AVX1-NEXT: vcvtsi2ss %rax, %xmm2, %xmm2
|
|
||||||
; AVX1-NEXT: vaddss %xmm2, %xmm2, %xmm2
|
|
||||||
; AVX1-NEXT: .LBB49_6:
|
|
||||||
; AVX1-NEXT: vinsertps {{.*#+}} xmm1 = xmm2[0],xmm1[0],xmm2[2,3]
|
|
||||||
; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm0
|
|
||||||
; AVX1-NEXT: vmovq %xmm0, %rax
|
|
||||||
; AVX1-NEXT: testq %rax, %rax
|
|
||||||
; AVX1-NEXT: js .LBB49_7
|
|
||||||
; AVX1-NEXT: # %bb.8:
|
|
||||||
; AVX1-NEXT: vcvtsi2ss %rax, %xmm3, %xmm2
|
|
||||||
; AVX1-NEXT: jmp .LBB49_9
|
|
||||||
; AVX1-NEXT: .LBB49_7:
|
|
||||||
; AVX1-NEXT: movq %rax, %rcx
|
|
||||||
; AVX1-NEXT: shrq %rcx
|
|
||||||
; AVX1-NEXT: andl $1, %eax
|
|
||||||
; AVX1-NEXT: orq %rcx, %rax
|
|
||||||
; AVX1-NEXT: vcvtsi2ss %rax, %xmm3, %xmm2
|
|
||||||
; AVX1-NEXT: vaddss %xmm2, %xmm2, %xmm2
|
|
||||||
; AVX1-NEXT: .LBB49_9:
|
|
||||||
; AVX1-NEXT: vinsertps {{.*#+}} xmm1 = xmm1[0,1],xmm2[0],xmm1[3]
|
|
||||||
; AVX1-NEXT: vpextrq $1, %xmm0, %rax
|
|
||||||
; AVX1-NEXT: testq %rax, %rax
|
|
||||||
; AVX1-NEXT: js .LBB49_10
|
|
||||||
; AVX1-NEXT: # %bb.11:
|
|
||||||
; AVX1-NEXT: vcvtsi2ss %rax, %xmm3, %xmm0
|
|
||||||
; AVX1-NEXT: vinsertps {{.*#+}} xmm0 = xmm1[0,1,2],xmm0[0]
|
|
||||||
; AVX1-NEXT: vzeroupper
|
|
||||||
; AVX1-NEXT: retq
|
|
||||||
; AVX1-NEXT: .LBB49_10:
|
|
||||||
; AVX1-NEXT: movq %rax, %rcx
|
|
||||||
; AVX1-NEXT: shrq %rcx
|
|
||||||
; AVX1-NEXT: andl $1, %eax
|
|
||||||
; AVX1-NEXT: orq %rcx, %rax
|
|
||||||
; AVX1-NEXT: vcvtsi2ss %rax, %xmm3, %xmm0
|
|
||||||
; AVX1-NEXT: vaddss %xmm0, %xmm0, %xmm0
|
|
||||||
; AVX1-NEXT: vinsertps {{.*#+}} xmm0 = xmm1[0,1,2],xmm0[0]
|
|
||||||
; AVX1-NEXT: vzeroupper
|
; AVX1-NEXT: vzeroupper
|
||||||
; AVX1-NEXT: retq
|
; AVX1-NEXT: retq
|
||||||
;
|
;
|
||||||
; AVX2-LABEL: uitofp_4i64_to_4f32:
|
; AVX2-LABEL: uitofp_4i64_to_4f32:
|
||||||
; AVX2: # %bb.0:
|
; AVX2: # %bb.0:
|
||||||
|
; AVX2-NEXT: vpxor %xmm1, %xmm1, %xmm1
|
||||||
|
; AVX2-NEXT: vpcmpgtq %ymm0, %ymm1, %ymm1
|
||||||
|
; AVX2-NEXT: vextracti128 $1, %ymm1, %xmm2
|
||||||
|
; AVX2-NEXT: vpackssdw %xmm2, %xmm1, %xmm1
|
||||||
|
; AVX2-NEXT: vpbroadcastq {{.*#+}} ymm2 = [1,1,1,1]
|
||||||
|
; AVX2-NEXT: vpand %ymm2, %ymm0, %ymm2
|
||||||
|
; AVX2-NEXT: vpsrlq $1, %ymm0, %ymm3
|
||||||
|
; AVX2-NEXT: vpor %ymm2, %ymm3, %ymm2
|
||||||
|
; AVX2-NEXT: vblendvpd %ymm0, %ymm2, %ymm0, %ymm0
|
||||||
; AVX2-NEXT: vpextrq $1, %xmm0, %rax
|
; AVX2-NEXT: vpextrq $1, %xmm0, %rax
|
||||||
; AVX2-NEXT: testq %rax, %rax
|
; AVX2-NEXT: vcvtsi2ss %rax, %xmm4, %xmm2
|
||||||
; AVX2-NEXT: js .LBB49_1
|
|
||||||
; AVX2-NEXT: # %bb.2:
|
|
||||||
; AVX2-NEXT: vcvtsi2ss %rax, %xmm1, %xmm1
|
|
||||||
; AVX2-NEXT: jmp .LBB49_3
|
|
||||||
; AVX2-NEXT: .LBB49_1:
|
|
||||||
; AVX2-NEXT: movq %rax, %rcx
|
|
||||||
; AVX2-NEXT: shrq %rcx
|
|
||||||
; AVX2-NEXT: andl $1, %eax
|
|
||||||
; AVX2-NEXT: orq %rcx, %rax
|
|
||||||
; AVX2-NEXT: vcvtsi2ss %rax, %xmm1, %xmm1
|
|
||||||
; AVX2-NEXT: vaddss %xmm1, %xmm1, %xmm1
|
|
||||||
; AVX2-NEXT: .LBB49_3:
|
|
||||||
; AVX2-NEXT: vmovq %xmm0, %rax
|
; AVX2-NEXT: vmovq %xmm0, %rax
|
||||||
; AVX2-NEXT: testq %rax, %rax
|
; AVX2-NEXT: vcvtsi2ss %rax, %xmm4, %xmm3
|
||||||
; AVX2-NEXT: js .LBB49_4
|
; AVX2-NEXT: vinsertps {{.*#+}} xmm2 = xmm3[0],xmm2[0],xmm3[2,3]
|
||||||
; AVX2-NEXT: # %bb.5:
|
|
||||||
; AVX2-NEXT: vcvtsi2ss %rax, %xmm2, %xmm2
|
|
||||||
; AVX2-NEXT: jmp .LBB49_6
|
|
||||||
; AVX2-NEXT: .LBB49_4:
|
|
||||||
; AVX2-NEXT: movq %rax, %rcx
|
|
||||||
; AVX2-NEXT: shrq %rcx
|
|
||||||
; AVX2-NEXT: andl $1, %eax
|
|
||||||
; AVX2-NEXT: orq %rcx, %rax
|
|
||||||
; AVX2-NEXT: vcvtsi2ss %rax, %xmm2, %xmm2
|
|
||||||
; AVX2-NEXT: vaddss %xmm2, %xmm2, %xmm2
|
|
||||||
; AVX2-NEXT: .LBB49_6:
|
|
||||||
; AVX2-NEXT: vinsertps {{.*#+}} xmm1 = xmm2[0],xmm1[0],xmm2[2,3]
|
|
||||||
; AVX2-NEXT: vextracti128 $1, %ymm0, %xmm0
|
; AVX2-NEXT: vextracti128 $1, %ymm0, %xmm0
|
||||||
; AVX2-NEXT: vmovq %xmm0, %rax
|
; AVX2-NEXT: vmovq %xmm0, %rax
|
||||||
; AVX2-NEXT: testq %rax, %rax
|
; AVX2-NEXT: vcvtsi2ss %rax, %xmm4, %xmm3
|
||||||
; AVX2-NEXT: js .LBB49_7
|
; AVX2-NEXT: vinsertps {{.*#+}} xmm2 = xmm2[0,1],xmm3[0],xmm2[3]
|
||||||
; AVX2-NEXT: # %bb.8:
|
|
||||||
; AVX2-NEXT: vcvtsi2ss %rax, %xmm3, %xmm2
|
|
||||||
; AVX2-NEXT: jmp .LBB49_9
|
|
||||||
; AVX2-NEXT: .LBB49_7:
|
|
||||||
; AVX2-NEXT: movq %rax, %rcx
|
|
||||||
; AVX2-NEXT: shrq %rcx
|
|
||||||
; AVX2-NEXT: andl $1, %eax
|
|
||||||
; AVX2-NEXT: orq %rcx, %rax
|
|
||||||
; AVX2-NEXT: vcvtsi2ss %rax, %xmm3, %xmm2
|
|
||||||
; AVX2-NEXT: vaddss %xmm2, %xmm2, %xmm2
|
|
||||||
; AVX2-NEXT: .LBB49_9:
|
|
||||||
; AVX2-NEXT: vinsertps {{.*#+}} xmm1 = xmm1[0,1],xmm2[0],xmm1[3]
|
|
||||||
; AVX2-NEXT: vpextrq $1, %xmm0, %rax
|
; AVX2-NEXT: vpextrq $1, %xmm0, %rax
|
||||||
; AVX2-NEXT: testq %rax, %rax
|
; AVX2-NEXT: vcvtsi2ss %rax, %xmm4, %xmm0
|
||||||
; AVX2-NEXT: js .LBB49_10
|
; AVX2-NEXT: vinsertps {{.*#+}} xmm0 = xmm2[0,1,2],xmm0[0]
|
||||||
; AVX2-NEXT: # %bb.11:
|
; AVX2-NEXT: vaddps %xmm0, %xmm0, %xmm2
|
||||||
; AVX2-NEXT: vcvtsi2ss %rax, %xmm3, %xmm0
|
; AVX2-NEXT: vblendvps %xmm1, %xmm2, %xmm0, %xmm0
|
||||||
; AVX2-NEXT: vinsertps {{.*#+}} xmm0 = xmm1[0,1,2],xmm0[0]
|
|
||||||
; AVX2-NEXT: vzeroupper
|
|
||||||
; AVX2-NEXT: retq
|
|
||||||
; AVX2-NEXT: .LBB49_10:
|
|
||||||
; AVX2-NEXT: movq %rax, %rcx
|
|
||||||
; AVX2-NEXT: shrq %rcx
|
|
||||||
; AVX2-NEXT: andl $1, %eax
|
|
||||||
; AVX2-NEXT: orq %rcx, %rax
|
|
||||||
; AVX2-NEXT: vcvtsi2ss %rax, %xmm3, %xmm0
|
|
||||||
; AVX2-NEXT: vaddss %xmm0, %xmm0, %xmm0
|
|
||||||
; AVX2-NEXT: vinsertps {{.*#+}} xmm0 = xmm1[0,1,2],xmm0[0]
|
|
||||||
; AVX2-NEXT: vzeroupper
|
; AVX2-NEXT: vzeroupper
|
||||||
; AVX2-NEXT: retq
|
; AVX2-NEXT: retq
|
||||||
;
|
;
|
||||||
@ -4649,70 +4568,66 @@ define <4 x float> @uitofp_load_4i64_to_4f32(<4 x i64> *%a) {
|
|||||||
; SSE41-NEXT: insertps {{.*#+}} xmm0 = xmm0[0,1,2],xmm1[0]
|
; SSE41-NEXT: insertps {{.*#+}} xmm0 = xmm0[0,1,2],xmm1[0]
|
||||||
; SSE41-NEXT: retq
|
; SSE41-NEXT: retq
|
||||||
;
|
;
|
||||||
; VEX-LABEL: uitofp_load_4i64_to_4f32:
|
; AVX1-LABEL: uitofp_load_4i64_to_4f32:
|
||||||
; VEX: # %bb.0:
|
; AVX1: # %bb.0:
|
||||||
; VEX-NEXT: vmovdqa (%rdi), %xmm2
|
; AVX1-NEXT: vmovapd (%rdi), %ymm0
|
||||||
; VEX-NEXT: vmovaps 16(%rdi), %xmm0
|
; AVX1-NEXT: vmovdqa (%rdi), %xmm1
|
||||||
; VEX-NEXT: vpextrq $1, %xmm2, %rax
|
; AVX1-NEXT: vmovdqa 16(%rdi), %xmm2
|
||||||
; VEX-NEXT: testq %rax, %rax
|
; AVX1-NEXT: vpsrlq $1, %xmm1, %xmm3
|
||||||
; VEX-NEXT: js .LBB83_1
|
; AVX1-NEXT: vpsrlq $1, %xmm2, %xmm4
|
||||||
; VEX-NEXT: # %bb.2:
|
; AVX1-NEXT: vinsertf128 $1, %xmm4, %ymm3, %ymm3
|
||||||
; VEX-NEXT: vcvtsi2ss %rax, %xmm1, %xmm1
|
; AVX1-NEXT: vandpd {{.*}}(%rip), %ymm0, %ymm0
|
||||||
; VEX-NEXT: jmp .LBB83_3
|
; AVX1-NEXT: vorpd %ymm0, %ymm3, %ymm0
|
||||||
; VEX-NEXT: .LBB83_1:
|
; AVX1-NEXT: vblendvpd %xmm1, %xmm0, %xmm1, %xmm3
|
||||||
; VEX-NEXT: movq %rax, %rcx
|
; AVX1-NEXT: vpextrq $1, %xmm3, %rax
|
||||||
; VEX-NEXT: shrq %rcx
|
; AVX1-NEXT: vcvtsi2ss %rax, %xmm5, %xmm4
|
||||||
; VEX-NEXT: andl $1, %eax
|
; AVX1-NEXT: vmovq %xmm3, %rax
|
||||||
; VEX-NEXT: orq %rcx, %rax
|
; AVX1-NEXT: vcvtsi2ss %rax, %xmm5, %xmm3
|
||||||
; VEX-NEXT: vcvtsi2ss %rax, %xmm1, %xmm1
|
; AVX1-NEXT: vinsertps {{.*#+}} xmm3 = xmm3[0],xmm4[0],xmm3[2,3]
|
||||||
; VEX-NEXT: vaddss %xmm1, %xmm1, %xmm1
|
; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm0
|
||||||
; VEX-NEXT: .LBB83_3:
|
; AVX1-NEXT: vblendvpd %xmm2, %xmm0, %xmm2, %xmm0
|
||||||
; VEX-NEXT: vmovq %xmm2, %rax
|
; AVX1-NEXT: vmovq %xmm0, %rax
|
||||||
; VEX-NEXT: testq %rax, %rax
|
; AVX1-NEXT: vcvtsi2ss %rax, %xmm5, %xmm4
|
||||||
; VEX-NEXT: js .LBB83_4
|
; AVX1-NEXT: vinsertps {{.*#+}} xmm3 = xmm3[0,1],xmm4[0],xmm3[3]
|
||||||
; VEX-NEXT: # %bb.5:
|
; AVX1-NEXT: vpextrq $1, %xmm0, %rax
|
||||||
; VEX-NEXT: vcvtsi2ss %rax, %xmm3, %xmm2
|
; AVX1-NEXT: vcvtsi2ss %rax, %xmm5, %xmm0
|
||||||
; VEX-NEXT: jmp .LBB83_6
|
; AVX1-NEXT: vinsertps {{.*#+}} xmm0 = xmm3[0,1,2],xmm0[0]
|
||||||
; VEX-NEXT: .LBB83_4:
|
; AVX1-NEXT: vaddps %xmm0, %xmm0, %xmm3
|
||||||
; VEX-NEXT: movq %rax, %rcx
|
; AVX1-NEXT: vxorps %xmm4, %xmm4, %xmm4
|
||||||
; VEX-NEXT: shrq %rcx
|
; AVX1-NEXT: vpcmpgtq %xmm2, %xmm4, %xmm2
|
||||||
; VEX-NEXT: andl $1, %eax
|
; AVX1-NEXT: vpackssdw %xmm2, %xmm1, %xmm1
|
||||||
; VEX-NEXT: orq %rcx, %rax
|
; AVX1-NEXT: vblendvps %xmm1, %xmm3, %xmm0, %xmm0
|
||||||
; VEX-NEXT: vcvtsi2ss %rax, %xmm3, %xmm2
|
; AVX1-NEXT: vzeroupper
|
||||||
; VEX-NEXT: vaddss %xmm2, %xmm2, %xmm2
|
; AVX1-NEXT: retq
|
||||||
; VEX-NEXT: .LBB83_6:
|
;
|
||||||
; VEX-NEXT: vinsertps {{.*#+}} xmm1 = xmm2[0],xmm1[0],xmm2[2,3]
|
; AVX2-LABEL: uitofp_load_4i64_to_4f32:
|
||||||
; VEX-NEXT: vmovq %xmm0, %rax
|
; AVX2: # %bb.0:
|
||||||
; VEX-NEXT: testq %rax, %rax
|
; AVX2-NEXT: vmovdqa (%rdi), %ymm0
|
||||||
; VEX-NEXT: js .LBB83_7
|
; AVX2-NEXT: vpxor %xmm1, %xmm1, %xmm1
|
||||||
; VEX-NEXT: # %bb.8:
|
; AVX2-NEXT: vpcmpgtq %ymm0, %ymm1, %ymm1
|
||||||
; VEX-NEXT: vcvtsi2ss %rax, %xmm3, %xmm2
|
; AVX2-NEXT: vextracti128 $1, %ymm1, %xmm2
|
||||||
; VEX-NEXT: jmp .LBB83_9
|
; AVX2-NEXT: vpackssdw %xmm2, %xmm1, %xmm1
|
||||||
; VEX-NEXT: .LBB83_7:
|
; AVX2-NEXT: vpbroadcastq {{.*#+}} ymm2 = [1,1,1,1]
|
||||||
; VEX-NEXT: movq %rax, %rcx
|
; AVX2-NEXT: vpand %ymm2, %ymm0, %ymm2
|
||||||
; VEX-NEXT: shrq %rcx
|
; AVX2-NEXT: vpsrlq $1, %ymm0, %ymm3
|
||||||
; VEX-NEXT: andl $1, %eax
|
; AVX2-NEXT: vpor %ymm2, %ymm3, %ymm2
|
||||||
; VEX-NEXT: orq %rcx, %rax
|
; AVX2-NEXT: vblendvpd %ymm0, %ymm2, %ymm0, %ymm0
|
||||||
; VEX-NEXT: vcvtsi2ss %rax, %xmm3, %xmm2
|
; AVX2-NEXT: vpextrq $1, %xmm0, %rax
|
||||||
; VEX-NEXT: vaddss %xmm2, %xmm2, %xmm2
|
; AVX2-NEXT: vcvtsi2ss %rax, %xmm4, %xmm2
|
||||||
; VEX-NEXT: .LBB83_9:
|
; AVX2-NEXT: vmovq %xmm0, %rax
|
||||||
; VEX-NEXT: vinsertps {{.*#+}} xmm1 = xmm1[0,1],xmm2[0],xmm1[3]
|
; AVX2-NEXT: vcvtsi2ss %rax, %xmm4, %xmm3
|
||||||
; VEX-NEXT: vpextrq $1, %xmm0, %rax
|
; AVX2-NEXT: vinsertps {{.*#+}} xmm2 = xmm3[0],xmm2[0],xmm3[2,3]
|
||||||
; VEX-NEXT: testq %rax, %rax
|
; AVX2-NEXT: vextracti128 $1, %ymm0, %xmm0
|
||||||
; VEX-NEXT: js .LBB83_10
|
; AVX2-NEXT: vmovq %xmm0, %rax
|
||||||
; VEX-NEXT: # %bb.11:
|
; AVX2-NEXT: vcvtsi2ss %rax, %xmm4, %xmm3
|
||||||
; VEX-NEXT: vcvtsi2ss %rax, %xmm3, %xmm0
|
; AVX2-NEXT: vinsertps {{.*#+}} xmm2 = xmm2[0,1],xmm3[0],xmm2[3]
|
||||||
; VEX-NEXT: vinsertps {{.*#+}} xmm0 = xmm1[0,1,2],xmm0[0]
|
; AVX2-NEXT: vpextrq $1, %xmm0, %rax
|
||||||
; VEX-NEXT: retq
|
; AVX2-NEXT: vcvtsi2ss %rax, %xmm4, %xmm0
|
||||||
; VEX-NEXT: .LBB83_10:
|
; AVX2-NEXT: vinsertps {{.*#+}} xmm0 = xmm2[0,1,2],xmm0[0]
|
||||||
; VEX-NEXT: movq %rax, %rcx
|
; AVX2-NEXT: vaddps %xmm0, %xmm0, %xmm2
|
||||||
; VEX-NEXT: shrq %rcx
|
; AVX2-NEXT: vblendvps %xmm1, %xmm2, %xmm0, %xmm0
|
||||||
; VEX-NEXT: andl $1, %eax
|
; AVX2-NEXT: vzeroupper
|
||||||
; VEX-NEXT: orq %rcx, %rax
|
; AVX2-NEXT: retq
|
||||||
; VEX-NEXT: vcvtsi2ss %rax, %xmm3, %xmm0
|
|
||||||
; VEX-NEXT: vaddss %xmm0, %xmm0, %xmm0
|
|
||||||
; VEX-NEXT: vinsertps {{.*#+}} xmm0 = xmm1[0,1,2],xmm0[0]
|
|
||||||
; VEX-NEXT: retq
|
|
||||||
;
|
;
|
||||||
; AVX512F-LABEL: uitofp_load_4i64_to_4f32:
|
; AVX512F-LABEL: uitofp_load_4i64_to_4f32:
|
||||||
; AVX512F: # %bb.0:
|
; AVX512F: # %bb.0:
|
||||||
@ -5168,132 +5083,113 @@ define <8 x float> @uitofp_load_8i64_to_8f32(<8 x i64> *%a) {
|
|||||||
; SSE41-NEXT: insertps {{.*#+}} xmm1 = xmm1[0,1,2],xmm2[0]
|
; SSE41-NEXT: insertps {{.*#+}} xmm1 = xmm1[0,1,2],xmm2[0]
|
||||||
; SSE41-NEXT: retq
|
; SSE41-NEXT: retq
|
||||||
;
|
;
|
||||||
; VEX-LABEL: uitofp_load_8i64_to_8f32:
|
; AVX1-LABEL: uitofp_load_8i64_to_8f32:
|
||||||
; VEX: # %bb.0:
|
; AVX1: # %bb.0:
|
||||||
; VEX-NEXT: vmovdqa (%rdi), %xmm1
|
; AVX1-NEXT: vmovapd {{.*#+}} ymm2 = [1,1,1,1]
|
||||||
; VEX-NEXT: vmovaps 16(%rdi), %xmm0
|
; AVX1-NEXT: vandpd 32(%rdi), %ymm2, %ymm3
|
||||||
; VEX-NEXT: vmovdqa 32(%rdi), %xmm4
|
; AVX1-NEXT: vmovaps (%rdi), %xmm0
|
||||||
; VEX-NEXT: vmovdqa 48(%rdi), %xmm3
|
; AVX1-NEXT: vmovdqa 16(%rdi), %xmm1
|
||||||
; VEX-NEXT: vpextrq $1, %xmm4, %rax
|
; AVX1-NEXT: vmovdqa 32(%rdi), %xmm4
|
||||||
; VEX-NEXT: testq %rax, %rax
|
; AVX1-NEXT: vmovdqa 48(%rdi), %xmm5
|
||||||
; VEX-NEXT: js .LBB87_1
|
; AVX1-NEXT: vpsrlq $1, %xmm4, %xmm6
|
||||||
; VEX-NEXT: # %bb.2:
|
; AVX1-NEXT: vpsrlq $1, %xmm5, %xmm7
|
||||||
; VEX-NEXT: vcvtsi2ss %rax, %xmm2, %xmm2
|
; AVX1-NEXT: vinsertf128 $1, %xmm7, %ymm6, %ymm6
|
||||||
; VEX-NEXT: jmp .LBB87_3
|
; AVX1-NEXT: vorpd %ymm3, %ymm6, %ymm3
|
||||||
; VEX-NEXT: .LBB87_1:
|
; AVX1-NEXT: vblendvpd %xmm4, %xmm3, %xmm4, %xmm6
|
||||||
; VEX-NEXT: movq %rax, %rcx
|
; AVX1-NEXT: vpextrq $1, %xmm6, %rax
|
||||||
; VEX-NEXT: shrq %rcx
|
; AVX1-NEXT: vcvtsi2ss %rax, %xmm8, %xmm7
|
||||||
; VEX-NEXT: andl $1, %eax
|
; AVX1-NEXT: vmovq %xmm6, %rax
|
||||||
; VEX-NEXT: orq %rcx, %rax
|
; AVX1-NEXT: vcvtsi2ss %rax, %xmm8, %xmm6
|
||||||
; VEX-NEXT: vcvtsi2ss %rax, %xmm2, %xmm2
|
; AVX1-NEXT: vinsertps {{.*#+}} xmm6 = xmm6[0],xmm7[0],xmm6[2,3]
|
||||||
; VEX-NEXT: vaddss %xmm2, %xmm2, %xmm2
|
; AVX1-NEXT: vextractf128 $1, %ymm3, %xmm3
|
||||||
; VEX-NEXT: .LBB87_3:
|
; AVX1-NEXT: vblendvpd %xmm5, %xmm3, %xmm5, %xmm3
|
||||||
; VEX-NEXT: vmovq %xmm4, %rax
|
; AVX1-NEXT: vmovq %xmm3, %rax
|
||||||
; VEX-NEXT: testq %rax, %rax
|
; AVX1-NEXT: vcvtsi2ss %rax, %xmm8, %xmm7
|
||||||
; VEX-NEXT: js .LBB87_4
|
; AVX1-NEXT: vinsertps {{.*#+}} xmm6 = xmm6[0,1],xmm7[0],xmm6[3]
|
||||||
; VEX-NEXT: # %bb.5:
|
; AVX1-NEXT: vpextrq $1, %xmm3, %rax
|
||||||
; VEX-NEXT: vcvtsi2ss %rax, %xmm5, %xmm5
|
; AVX1-NEXT: vcvtsi2ss %rax, %xmm8, %xmm3
|
||||||
; VEX-NEXT: jmp .LBB87_6
|
; AVX1-NEXT: vinsertps {{.*#+}} xmm3 = xmm6[0,1,2],xmm3[0]
|
||||||
; VEX-NEXT: .LBB87_4:
|
; AVX1-NEXT: vaddps %xmm3, %xmm3, %xmm6
|
||||||
; VEX-NEXT: movq %rax, %rcx
|
; AVX1-NEXT: vxorps %xmm7, %xmm7, %xmm7
|
||||||
; VEX-NEXT: shrq %rcx
|
; AVX1-NEXT: vpcmpgtq %xmm5, %xmm7, %xmm5
|
||||||
; VEX-NEXT: andl $1, %eax
|
; AVX1-NEXT: vpackssdw %xmm5, %xmm4, %xmm4
|
||||||
; VEX-NEXT: orq %rcx, %rax
|
; AVX1-NEXT: vblendvps %xmm4, %xmm6, %xmm3, %xmm3
|
||||||
; VEX-NEXT: vcvtsi2ss %rax, %xmm5, %xmm4
|
; AVX1-NEXT: vandpd (%rdi), %ymm2, %ymm2
|
||||||
; VEX-NEXT: vaddss %xmm4, %xmm4, %xmm5
|
; AVX1-NEXT: vpsrlq $1, %xmm0, %xmm4
|
||||||
; VEX-NEXT: .LBB87_6:
|
; AVX1-NEXT: vpsrlq $1, %xmm1, %xmm5
|
||||||
; VEX-NEXT: vmovq %xmm3, %rax
|
; AVX1-NEXT: vinsertf128 $1, %xmm5, %ymm4, %ymm4
|
||||||
; VEX-NEXT: testq %rax, %rax
|
; AVX1-NEXT: vorpd %ymm2, %ymm4, %ymm2
|
||||||
; VEX-NEXT: js .LBB87_7
|
; AVX1-NEXT: vblendvpd %xmm0, %xmm2, %xmm0, %xmm4
|
||||||
; VEX-NEXT: # %bb.8:
|
; AVX1-NEXT: vpextrq $1, %xmm4, %rax
|
||||||
; VEX-NEXT: vcvtsi2ss %rax, %xmm6, %xmm4
|
; AVX1-NEXT: vcvtsi2ss %rax, %xmm8, %xmm5
|
||||||
; VEX-NEXT: jmp .LBB87_9
|
; AVX1-NEXT: vmovq %xmm4, %rax
|
||||||
; VEX-NEXT: .LBB87_7:
|
; AVX1-NEXT: vcvtsi2ss %rax, %xmm8, %xmm4
|
||||||
; VEX-NEXT: movq %rax, %rcx
|
; AVX1-NEXT: vinsertps {{.*#+}} xmm4 = xmm4[0],xmm5[0],xmm4[2,3]
|
||||||
; VEX-NEXT: shrq %rcx
|
; AVX1-NEXT: vextractf128 $1, %ymm2, %xmm2
|
||||||
; VEX-NEXT: andl $1, %eax
|
; AVX1-NEXT: vblendvpd %xmm1, %xmm2, %xmm1, %xmm2
|
||||||
; VEX-NEXT: orq %rcx, %rax
|
; AVX1-NEXT: vmovq %xmm2, %rax
|
||||||
; VEX-NEXT: vcvtsi2ss %rax, %xmm6, %xmm4
|
; AVX1-NEXT: vcvtsi2ss %rax, %xmm8, %xmm5
|
||||||
; VEX-NEXT: vaddss %xmm4, %xmm4, %xmm4
|
; AVX1-NEXT: vinsertps {{.*#+}} xmm4 = xmm4[0,1],xmm5[0],xmm4[3]
|
||||||
; VEX-NEXT: .LBB87_9:
|
; AVX1-NEXT: vpextrq $1, %xmm2, %rax
|
||||||
; VEX-NEXT: vpextrq $1, %xmm3, %rax
|
; AVX1-NEXT: vcvtsi2ss %rax, %xmm8, %xmm2
|
||||||
; VEX-NEXT: testq %rax, %rax
|
; AVX1-NEXT: vinsertps {{.*#+}} xmm2 = xmm4[0,1,2],xmm2[0]
|
||||||
; VEX-NEXT: js .LBB87_10
|
; AVX1-NEXT: vaddps %xmm2, %xmm2, %xmm4
|
||||||
; VEX-NEXT: # %bb.11:
|
; AVX1-NEXT: vpcmpgtq %xmm1, %xmm7, %xmm1
|
||||||
; VEX-NEXT: vcvtsi2ss %rax, %xmm6, %xmm3
|
; AVX1-NEXT: vpackssdw %xmm1, %xmm0, %xmm0
|
||||||
; VEX-NEXT: jmp .LBB87_12
|
; AVX1-NEXT: vblendvps %xmm0, %xmm4, %xmm2, %xmm0
|
||||||
; VEX-NEXT: .LBB87_10:
|
; AVX1-NEXT: vinsertf128 $1, %xmm3, %ymm0, %ymm0
|
||||||
; VEX-NEXT: movq %rax, %rcx
|
; AVX1-NEXT: retq
|
||||||
; VEX-NEXT: shrq %rcx
|
;
|
||||||
; VEX-NEXT: andl $1, %eax
|
; AVX2-LABEL: uitofp_load_8i64_to_8f32:
|
||||||
; VEX-NEXT: orq %rcx, %rax
|
; AVX2: # %bb.0:
|
||||||
; VEX-NEXT: vcvtsi2ss %rax, %xmm6, %xmm3
|
; AVX2-NEXT: vmovaps (%rdi), %ymm0
|
||||||
; VEX-NEXT: vaddss %xmm3, %xmm3, %xmm3
|
; AVX2-NEXT: vmovdqa 32(%rdi), %ymm1
|
||||||
; VEX-NEXT: .LBB87_12:
|
; AVX2-NEXT: vpxor %xmm2, %xmm2, %xmm2
|
||||||
; VEX-NEXT: vpextrq $1, %xmm1, %rax
|
; AVX2-NEXT: vpcmpgtq %ymm1, %ymm2, %ymm3
|
||||||
; VEX-NEXT: testq %rax, %rax
|
; AVX2-NEXT: vextracti128 $1, %ymm3, %xmm4
|
||||||
; VEX-NEXT: js .LBB87_13
|
; AVX2-NEXT: vpackssdw %xmm4, %xmm3, %xmm3
|
||||||
; VEX-NEXT: # %bb.14:
|
; AVX2-NEXT: vpbroadcastq {{.*#+}} ymm4 = [1,1,1,1]
|
||||||
; VEX-NEXT: vcvtsi2ss %rax, %xmm6, %xmm6
|
; AVX2-NEXT: vpand %ymm4, %ymm1, %ymm5
|
||||||
; VEX-NEXT: jmp .LBB87_15
|
; AVX2-NEXT: vpsrlq $1, %ymm1, %ymm6
|
||||||
; VEX-NEXT: .LBB87_13:
|
; AVX2-NEXT: vpor %ymm5, %ymm6, %ymm5
|
||||||
; VEX-NEXT: movq %rax, %rcx
|
; AVX2-NEXT: vblendvpd %ymm1, %ymm5, %ymm1, %ymm1
|
||||||
; VEX-NEXT: shrq %rcx
|
; AVX2-NEXT: vpextrq $1, %xmm1, %rax
|
||||||
; VEX-NEXT: andl $1, %eax
|
; AVX2-NEXT: vcvtsi2ss %rax, %xmm7, %xmm5
|
||||||
; VEX-NEXT: orq %rcx, %rax
|
; AVX2-NEXT: vmovq %xmm1, %rax
|
||||||
; VEX-NEXT: vcvtsi2ss %rax, %xmm6, %xmm6
|
; AVX2-NEXT: vcvtsi2ss %rax, %xmm7, %xmm6
|
||||||
; VEX-NEXT: vaddss %xmm6, %xmm6, %xmm6
|
; AVX2-NEXT: vinsertps {{.*#+}} xmm5 = xmm6[0],xmm5[0],xmm6[2,3]
|
||||||
; VEX-NEXT: .LBB87_15:
|
; AVX2-NEXT: vextracti128 $1, %ymm1, %xmm1
|
||||||
; VEX-NEXT: vinsertps {{.*#+}} xmm2 = xmm5[0],xmm2[0],xmm5[2,3]
|
; AVX2-NEXT: vmovq %xmm1, %rax
|
||||||
; VEX-NEXT: vmovq %xmm1, %rax
|
; AVX2-NEXT: vcvtsi2ss %rax, %xmm7, %xmm6
|
||||||
; VEX-NEXT: testq %rax, %rax
|
; AVX2-NEXT: vinsertps {{.*#+}} xmm5 = xmm5[0,1],xmm6[0],xmm5[3]
|
||||||
; VEX-NEXT: js .LBB87_16
|
; AVX2-NEXT: vpextrq $1, %xmm1, %rax
|
||||||
; VEX-NEXT: # %bb.17:
|
; AVX2-NEXT: vcvtsi2ss %rax, %xmm7, %xmm1
|
||||||
; VEX-NEXT: vcvtsi2ss %rax, %xmm7, %xmm1
|
; AVX2-NEXT: vinsertps {{.*#+}} xmm1 = xmm5[0,1,2],xmm1[0]
|
||||||
; VEX-NEXT: jmp .LBB87_18
|
; AVX2-NEXT: vaddps %xmm1, %xmm1, %xmm5
|
||||||
; VEX-NEXT: .LBB87_16:
|
; AVX2-NEXT: vblendvps %xmm3, %xmm5, %xmm1, %xmm1
|
||||||
; VEX-NEXT: movq %rax, %rcx
|
; AVX2-NEXT: vpcmpgtq %ymm0, %ymm2, %ymm2
|
||||||
; VEX-NEXT: shrq %rcx
|
; AVX2-NEXT: vextracti128 $1, %ymm2, %xmm3
|
||||||
; VEX-NEXT: andl $1, %eax
|
; AVX2-NEXT: vpackssdw %xmm3, %xmm2, %xmm2
|
||||||
; VEX-NEXT: orq %rcx, %rax
|
; AVX2-NEXT: vpand %ymm4, %ymm0, %ymm3
|
||||||
; VEX-NEXT: vcvtsi2ss %rax, %xmm7, %xmm1
|
; AVX2-NEXT: vpsrlq $1, %ymm0, %ymm4
|
||||||
; VEX-NEXT: vaddss %xmm1, %xmm1, %xmm1
|
; AVX2-NEXT: vpor %ymm3, %ymm4, %ymm3
|
||||||
; VEX-NEXT: .LBB87_18:
|
; AVX2-NEXT: vblendvpd %ymm0, %ymm3, %ymm0, %ymm0
|
||||||
; VEX-NEXT: vinsertps {{.*#+}} xmm5 = xmm1[0],xmm6[0],xmm1[2,3]
|
; AVX2-NEXT: vpextrq $1, %xmm0, %rax
|
||||||
; VEX-NEXT: vinsertps {{.*#+}} xmm1 = xmm2[0,1],xmm4[0],xmm2[3]
|
; AVX2-NEXT: vcvtsi2ss %rax, %xmm7, %xmm3
|
||||||
; VEX-NEXT: vmovq %xmm0, %rax
|
; AVX2-NEXT: vmovq %xmm0, %rax
|
||||||
; VEX-NEXT: testq %rax, %rax
|
; AVX2-NEXT: vcvtsi2ss %rax, %xmm7, %xmm4
|
||||||
; VEX-NEXT: js .LBB87_19
|
; AVX2-NEXT: vinsertps {{.*#+}} xmm3 = xmm4[0],xmm3[0],xmm4[2,3]
|
||||||
; VEX-NEXT: # %bb.20:
|
; AVX2-NEXT: vextracti128 $1, %ymm0, %xmm0
|
||||||
; VEX-NEXT: vcvtsi2ss %rax, %xmm7, %xmm2
|
; AVX2-NEXT: vmovq %xmm0, %rax
|
||||||
; VEX-NEXT: jmp .LBB87_21
|
; AVX2-NEXT: vcvtsi2ss %rax, %xmm7, %xmm4
|
||||||
; VEX-NEXT: .LBB87_19:
|
; AVX2-NEXT: vinsertps {{.*#+}} xmm3 = xmm3[0,1],xmm4[0],xmm3[3]
|
||||||
; VEX-NEXT: movq %rax, %rcx
|
; AVX2-NEXT: vpextrq $1, %xmm0, %rax
|
||||||
; VEX-NEXT: shrq %rcx
|
; AVX2-NEXT: vcvtsi2ss %rax, %xmm7, %xmm0
|
||||||
; VEX-NEXT: andl $1, %eax
|
; AVX2-NEXT: vinsertps {{.*#+}} xmm0 = xmm3[0,1,2],xmm0[0]
|
||||||
; VEX-NEXT: orq %rcx, %rax
|
; AVX2-NEXT: vaddps %xmm0, %xmm0, %xmm3
|
||||||
; VEX-NEXT: vcvtsi2ss %rax, %xmm7, %xmm2
|
; AVX2-NEXT: vblendvps %xmm2, %xmm3, %xmm0, %xmm0
|
||||||
; VEX-NEXT: vaddss %xmm2, %xmm2, %xmm2
|
; AVX2-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
|
||||||
; VEX-NEXT: .LBB87_21:
|
; AVX2-NEXT: retq
|
||||||
; VEX-NEXT: vinsertps {{.*#+}} xmm2 = xmm5[0,1],xmm2[0],xmm5[3]
|
|
||||||
; VEX-NEXT: vinsertps {{.*#+}} xmm1 = xmm1[0,1,2],xmm3[0]
|
|
||||||
; VEX-NEXT: vpextrq $1, %xmm0, %rax
|
|
||||||
; VEX-NEXT: testq %rax, %rax
|
|
||||||
; VEX-NEXT: js .LBB87_22
|
|
||||||
; VEX-NEXT: # %bb.23:
|
|
||||||
; VEX-NEXT: vcvtsi2ss %rax, %xmm7, %xmm0
|
|
||||||
; VEX-NEXT: jmp .LBB87_24
|
|
||||||
; VEX-NEXT: .LBB87_22:
|
|
||||||
; VEX-NEXT: movq %rax, %rcx
|
|
||||||
; VEX-NEXT: shrq %rcx
|
|
||||||
; VEX-NEXT: andl $1, %eax
|
|
||||||
; VEX-NEXT: orq %rcx, %rax
|
|
||||||
; VEX-NEXT: vcvtsi2ss %rax, %xmm7, %xmm0
|
|
||||||
; VEX-NEXT: vaddss %xmm0, %xmm0, %xmm0
|
|
||||||
; VEX-NEXT: .LBB87_24:
|
|
||||||
; VEX-NEXT: vinsertps {{.*#+}} xmm0 = xmm2[0,1,2],xmm0[0]
|
|
||||||
; VEX-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
|
|
||||||
; VEX-NEXT: retq
|
|
||||||
;
|
;
|
||||||
; AVX512F-LABEL: uitofp_load_8i64_to_8f32:
|
; AVX512F-LABEL: uitofp_load_8i64_to_8f32:
|
||||||
; AVX512F: # %bb.0:
|
; AVX512F: # %bb.0:
|
||||||
|
@ -6941,33 +6941,20 @@ define <2 x float> @constrained_vector_uitofp_v2f32_v2i64(<2 x i64> %x) #0 {
|
|||||||
;
|
;
|
||||||
; AVX1-LABEL: constrained_vector_uitofp_v2f32_v2i64:
|
; AVX1-LABEL: constrained_vector_uitofp_v2f32_v2i64:
|
||||||
; AVX1: # %bb.0: # %entry
|
; AVX1: # %bb.0: # %entry
|
||||||
; AVX1-NEXT: vpextrq $1, %xmm0, %rax
|
; AVX1-NEXT: vpand {{.*}}(%rip), %xmm0, %xmm1
|
||||||
; AVX1-NEXT: movq %rax, %rcx
|
; AVX1-NEXT: vpsrlq $1, %xmm0, %xmm2
|
||||||
; AVX1-NEXT: shrq %rcx
|
; AVX1-NEXT: vpor %xmm1, %xmm2, %xmm1
|
||||||
; AVX1-NEXT: movl %eax, %edx
|
; AVX1-NEXT: vblendvpd %xmm0, %xmm1, %xmm0, %xmm1
|
||||||
; AVX1-NEXT: andl $1, %edx
|
; AVX1-NEXT: vpextrq $1, %xmm1, %rax
|
||||||
; AVX1-NEXT: orq %rcx, %rdx
|
; AVX1-NEXT: vcvtsi2ss %rax, %xmm3, %xmm2
|
||||||
; AVX1-NEXT: testq %rax, %rax
|
; AVX1-NEXT: vmovq %xmm1, %rax
|
||||||
; AVX1-NEXT: cmovnsq %rax, %rdx
|
; AVX1-NEXT: vcvtsi2ss %rax, %xmm3, %xmm1
|
||||||
; AVX1-NEXT: vcvtsi2ss %rdx, %xmm1, %xmm1
|
; AVX1-NEXT: vinsertps {{.*#+}} xmm1 = xmm1[0],xmm2[0],zero,zero
|
||||||
; AVX1-NEXT: jns .LBB174_2
|
; AVX1-NEXT: vaddps %xmm1, %xmm1, %xmm2
|
||||||
; AVX1-NEXT: # %bb.1:
|
; AVX1-NEXT: vpxor %xmm3, %xmm3, %xmm3
|
||||||
; AVX1-NEXT: vaddss %xmm1, %xmm1, %xmm1
|
; AVX1-NEXT: vpcmpgtq %xmm0, %xmm3, %xmm0
|
||||||
; AVX1-NEXT: .LBB174_2: # %entry
|
; AVX1-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[1,3,2,3]
|
||||||
; AVX1-NEXT: vmovq %xmm0, %rax
|
; AVX1-NEXT: vblendvps %xmm0, %xmm2, %xmm1, %xmm0
|
||||||
; AVX1-NEXT: movq %rax, %rcx
|
|
||||||
; AVX1-NEXT: shrq %rcx
|
|
||||||
; AVX1-NEXT: movl %eax, %edx
|
|
||||||
; AVX1-NEXT: andl $1, %edx
|
|
||||||
; AVX1-NEXT: orq %rcx, %rdx
|
|
||||||
; AVX1-NEXT: testq %rax, %rax
|
|
||||||
; AVX1-NEXT: cmovnsq %rax, %rdx
|
|
||||||
; AVX1-NEXT: vcvtsi2ss %rdx, %xmm2, %xmm0
|
|
||||||
; AVX1-NEXT: jns .LBB174_4
|
|
||||||
; AVX1-NEXT: # %bb.3:
|
|
||||||
; AVX1-NEXT: vaddss %xmm0, %xmm0, %xmm0
|
|
||||||
; AVX1-NEXT: .LBB174_4: # %entry
|
|
||||||
; AVX1-NEXT: vinsertps {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[2,3]
|
|
||||||
; AVX1-NEXT: retq
|
; AVX1-NEXT: retq
|
||||||
;
|
;
|
||||||
; AVX512-LABEL: constrained_vector_uitofp_v2f32_v2i64:
|
; AVX512-LABEL: constrained_vector_uitofp_v2f32_v2i64:
|
||||||
@ -7471,62 +7458,31 @@ define <4 x float> @constrained_vector_uitofp_v4f32_v4i64(<4 x i64> %x) #0 {
|
|||||||
;
|
;
|
||||||
; AVX1-LABEL: constrained_vector_uitofp_v4f32_v4i64:
|
; AVX1-LABEL: constrained_vector_uitofp_v4f32_v4i64:
|
||||||
; AVX1: # %bb.0: # %entry
|
; AVX1: # %bb.0: # %entry
|
||||||
; AVX1-NEXT: vpextrq $1, %xmm0, %rax
|
; AVX1-NEXT: vpsrlq $1, %xmm0, %xmm1
|
||||||
; AVX1-NEXT: movq %rax, %rcx
|
; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm2
|
||||||
; AVX1-NEXT: shrq %rcx
|
; AVX1-NEXT: vpsrlq $1, %xmm2, %xmm3
|
||||||
; AVX1-NEXT: movl %eax, %edx
|
; AVX1-NEXT: vinsertf128 $1, %xmm3, %ymm1, %ymm1
|
||||||
; AVX1-NEXT: andl $1, %edx
|
; AVX1-NEXT: vandpd {{.*}}(%rip), %ymm0, %ymm3
|
||||||
; AVX1-NEXT: orq %rcx, %rdx
|
; AVX1-NEXT: vorpd %ymm3, %ymm1, %ymm1
|
||||||
; AVX1-NEXT: testq %rax, %rax
|
; AVX1-NEXT: vblendvpd %xmm0, %xmm1, %xmm0, %xmm3
|
||||||
; AVX1-NEXT: cmovnsq %rax, %rdx
|
; AVX1-NEXT: vpextrq $1, %xmm3, %rax
|
||||||
; AVX1-NEXT: vcvtsi2ss %rdx, %xmm1, %xmm1
|
; AVX1-NEXT: vcvtsi2ss %rax, %xmm4, %xmm4
|
||||||
; AVX1-NEXT: jns .LBB182_2
|
; AVX1-NEXT: vmovq %xmm3, %rax
|
||||||
; AVX1-NEXT: # %bb.1:
|
; AVX1-NEXT: vcvtsi2ss %rax, %xmm5, %xmm3
|
||||||
; AVX1-NEXT: vaddss %xmm1, %xmm1, %xmm1
|
; AVX1-NEXT: vinsertps {{.*#+}} xmm3 = xmm3[0],xmm4[0],xmm3[2,3]
|
||||||
; AVX1-NEXT: .LBB182_2: # %entry
|
; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm1
|
||||||
; AVX1-NEXT: vmovq %xmm0, %rax
|
; AVX1-NEXT: vblendvpd %xmm2, %xmm1, %xmm2, %xmm1
|
||||||
; AVX1-NEXT: movq %rax, %rcx
|
; AVX1-NEXT: vmovq %xmm1, %rax
|
||||||
; AVX1-NEXT: shrq %rcx
|
; AVX1-NEXT: vcvtsi2ss %rax, %xmm5, %xmm4
|
||||||
; AVX1-NEXT: movl %eax, %edx
|
; AVX1-NEXT: vinsertps {{.*#+}} xmm3 = xmm3[0,1],xmm4[0],xmm3[3]
|
||||||
; AVX1-NEXT: andl $1, %edx
|
; AVX1-NEXT: vpextrq $1, %xmm1, %rax
|
||||||
; AVX1-NEXT: orq %rcx, %rdx
|
; AVX1-NEXT: vcvtsi2ss %rax, %xmm5, %xmm1
|
||||||
; AVX1-NEXT: testq %rax, %rax
|
; AVX1-NEXT: vinsertps {{.*#+}} xmm1 = xmm3[0,1,2],xmm1[0]
|
||||||
; AVX1-NEXT: cmovnsq %rax, %rdx
|
; AVX1-NEXT: vaddps %xmm1, %xmm1, %xmm3
|
||||||
; AVX1-NEXT: vcvtsi2ss %rdx, %xmm2, %xmm2
|
; AVX1-NEXT: vxorps %xmm4, %xmm4, %xmm4
|
||||||
; AVX1-NEXT: jns .LBB182_4
|
; AVX1-NEXT: vpcmpgtq %xmm2, %xmm4, %xmm2
|
||||||
; AVX1-NEXT: # %bb.3:
|
; AVX1-NEXT: vpackssdw %xmm2, %xmm0, %xmm0
|
||||||
; AVX1-NEXT: vaddss %xmm2, %xmm2, %xmm2
|
; AVX1-NEXT: vblendvps %xmm0, %xmm3, %xmm1, %xmm0
|
||||||
; AVX1-NEXT: .LBB182_4: # %entry
|
|
||||||
; AVX1-NEXT: vinsertps {{.*#+}} xmm1 = xmm2[0],xmm1[0],xmm2[2,3]
|
|
||||||
; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm0
|
|
||||||
; AVX1-NEXT: vmovq %xmm0, %rax
|
|
||||||
; AVX1-NEXT: movq %rax, %rcx
|
|
||||||
; AVX1-NEXT: shrq %rcx
|
|
||||||
; AVX1-NEXT: movl %eax, %edx
|
|
||||||
; AVX1-NEXT: andl $1, %edx
|
|
||||||
; AVX1-NEXT: orq %rcx, %rdx
|
|
||||||
; AVX1-NEXT: testq %rax, %rax
|
|
||||||
; AVX1-NEXT: cmovnsq %rax, %rdx
|
|
||||||
; AVX1-NEXT: vcvtsi2ss %rdx, %xmm3, %xmm2
|
|
||||||
; AVX1-NEXT: jns .LBB182_6
|
|
||||||
; AVX1-NEXT: # %bb.5:
|
|
||||||
; AVX1-NEXT: vaddss %xmm2, %xmm2, %xmm2
|
|
||||||
; AVX1-NEXT: .LBB182_6: # %entry
|
|
||||||
; AVX1-NEXT: vinsertps {{.*#+}} xmm1 = xmm1[0,1],xmm2[0],xmm1[3]
|
|
||||||
; AVX1-NEXT: vpextrq $1, %xmm0, %rax
|
|
||||||
; AVX1-NEXT: movq %rax, %rcx
|
|
||||||
; AVX1-NEXT: shrq %rcx
|
|
||||||
; AVX1-NEXT: movl %eax, %edx
|
|
||||||
; AVX1-NEXT: andl $1, %edx
|
|
||||||
; AVX1-NEXT: orq %rcx, %rdx
|
|
||||||
; AVX1-NEXT: testq %rax, %rax
|
|
||||||
; AVX1-NEXT: cmovnsq %rax, %rdx
|
|
||||||
; AVX1-NEXT: vcvtsi2ss %rdx, %xmm3, %xmm0
|
|
||||||
; AVX1-NEXT: jns .LBB182_8
|
|
||||||
; AVX1-NEXT: # %bb.7:
|
|
||||||
; AVX1-NEXT: vaddss %xmm0, %xmm0, %xmm0
|
|
||||||
; AVX1-NEXT: .LBB182_8: # %entry
|
|
||||||
; AVX1-NEXT: vinsertps {{.*#+}} xmm0 = xmm1[0,1,2],xmm0[0]
|
|
||||||
; AVX1-NEXT: vzeroupper
|
; AVX1-NEXT: vzeroupper
|
||||||
; AVX1-NEXT: retq
|
; AVX1-NEXT: retq
|
||||||
;
|
;
|
||||||
|
Loading…
Reference in New Issue
Block a user