mirror of
https://github.com/RPCS3/llvm-mirror.git
synced 2025-01-31 12:41:49 +01:00
[X86][SSE] Simplify MOVMSK patterns based on comparison
An initial patch adding combineSetCCMOVMSK to simplify MOVMSK and its vector input based on the comparison of the MOVMSK result. This first stage just adds support for some simple MOVMSK(PACKSSBW()) cases where we remove the PACKSS if we're comparing ne/eq zero (any_of patterns), allowing us to directly compare against the v8i16 source vector(s) bitcasted to v16i8, with suitable masking to take into account of which signbits are valid. Future combines could peek through further PACKSS, target shuffles, handle all_of patterns (ne/eq -1), optimize to a PTEST op, etc. Differential Revision: https://reviews.llvm.org/D81171
This commit is contained in:
parent
e09a3db448
commit
52416b557e
@ -40227,6 +40227,81 @@ static SDValue combinePTESTCC(SDValue EFLAGS, X86::CondCode &CC,
|
||||
return SDValue();
|
||||
}
|
||||
|
||||
// Attempt to simplify the MOVMSK input based on the comparison type.
|
||||
static SDValue combineSetCCMOVMSK(SDValue EFLAGS, X86::CondCode &CC,
|
||||
SelectionDAG &DAG,
|
||||
const X86Subtarget &Subtarget) {
|
||||
// Only handle eq/ne against zero (any_of).
|
||||
// TODO: Handle eq/ne against -1 (all_of) as well.
|
||||
if (!(CC == X86::COND_E || CC == X86::COND_NE))
|
||||
return SDValue();
|
||||
if (EFLAGS.getValueType() != MVT::i32)
|
||||
return SDValue();
|
||||
unsigned CmpOpcode = EFLAGS.getOpcode();
|
||||
if (CmpOpcode != X86ISD::CMP || !isNullConstant(EFLAGS.getOperand(1)))
|
||||
return SDValue();
|
||||
|
||||
SDValue CmpOp = EFLAGS.getOperand(0);
|
||||
unsigned CmpBits = CmpOp.getValueSizeInBits();
|
||||
|
||||
// Peek through any truncate.
|
||||
if (CmpOp.getOpcode() == ISD::TRUNCATE)
|
||||
CmpOp = CmpOp.getOperand(0);
|
||||
|
||||
// Bail if we don't find a MOVMSK.
|
||||
if (CmpOp.getOpcode() != X86ISD::MOVMSK)
|
||||
return SDValue();
|
||||
|
||||
SDValue Vec = CmpOp.getOperand(0);
|
||||
MVT VecVT = Vec.getSimpleValueType();
|
||||
assert((VecVT.is128BitVector() || VecVT.is256BitVector()) &&
|
||||
"Unexpected MOVMSK operand");
|
||||
|
||||
// See if we can avoid a PACKSS by calling MOVMSK on the sources.
|
||||
// For vXi16 cases we can use a v2Xi8 PMOVMSKB. We must mask out
|
||||
// sign bits prior to the comparison with zero unless we know that
|
||||
// the vXi16 splats the sign bit down to the lower i8 half.
|
||||
if (Vec.getOpcode() == X86ISD::PACKSS && VecVT == MVT::v16i8) {
|
||||
SDValue VecOp0 = Vec.getOperand(0);
|
||||
SDValue VecOp1 = Vec.getOperand(1);
|
||||
bool SignExt0 = DAG.ComputeNumSignBits(VecOp0) > 8;
|
||||
bool SignExt1 = DAG.ComputeNumSignBits(VecOp1) > 8;
|
||||
// PMOVMSKB(PACKSSBW(X, undef)) -> PMOVMSKB(BITCAST_v16i8(X)) & 0xAAAA.
|
||||
if (CmpBits == 8 && VecOp1.isUndef()) {
|
||||
SDLoc DL(EFLAGS);
|
||||
SDValue Result = DAG.getBitcast(MVT::v16i8, VecOp0);
|
||||
Result = DAG.getNode(X86ISD::MOVMSK, DL, MVT::i32, Result);
|
||||
Result = DAG.getZExtOrTrunc(Result, DL, MVT::i16);
|
||||
if (!SignExt0) {
|
||||
Result = DAG.getNode(ISD::AND, DL, MVT::i16, Result,
|
||||
DAG.getConstant(0xAAAA, DL, MVT::i16));
|
||||
}
|
||||
return DAG.getNode(X86ISD::CMP, DL, MVT::i32, Result,
|
||||
DAG.getConstant(0, DL, MVT::i16));
|
||||
}
|
||||
// PMOVMSKB(PACKSSBW(LO(X), HI(X)))
|
||||
// -> PMOVMSKB(BITCAST_v32i8(X)) & 0xAAAAAAAA.
|
||||
if (CmpBits == 16 && Subtarget.hasInt256() &&
|
||||
VecOp0.getOpcode() == ISD::EXTRACT_SUBVECTOR &&
|
||||
VecOp1.getOpcode() == ISD::EXTRACT_SUBVECTOR &&
|
||||
VecOp0.getOperand(0) == VecOp1.getOperand(0) &&
|
||||
VecOp0.getConstantOperandAPInt(1) == 0 &&
|
||||
VecOp1.getConstantOperandAPInt(1) == 8) {
|
||||
SDLoc DL(EFLAGS);
|
||||
SDValue Result = DAG.getBitcast(MVT::v32i8, VecOp0.getOperand(0));
|
||||
Result = DAG.getNode(X86ISD::MOVMSK, DL, MVT::i32, Result);
|
||||
if (!SignExt0 || !SignExt1) {
|
||||
Result = DAG.getNode(ISD::AND, DL, MVT::i32, Result,
|
||||
DAG.getConstant(0xAAAAAAAA, DL, MVT::i32));
|
||||
}
|
||||
return DAG.getNode(X86ISD::CMP, DL, MVT::i32, Result,
|
||||
DAG.getConstant(0, DL, MVT::i32));
|
||||
}
|
||||
}
|
||||
|
||||
return SDValue();
|
||||
}
|
||||
|
||||
/// Optimize an EFLAGS definition used according to the condition code \p CC
|
||||
/// into a simpler EFLAGS value, potentially returning a new \p CC and replacing
|
||||
/// uses of chain values.
|
||||
@ -40243,6 +40318,9 @@ static SDValue combineSetCCEFLAGS(SDValue EFLAGS, X86::CondCode &CC,
|
||||
if (SDValue R = combinePTESTCC(EFLAGS, CC, DAG, Subtarget))
|
||||
return R;
|
||||
|
||||
if (SDValue R = combineSetCCMOVMSK(EFLAGS, CC, DAG, Subtarget))
|
||||
return R;
|
||||
|
||||
return combineSetCCAtomicArith(EFLAGS, CC, DAG, Subtarget);
|
||||
}
|
||||
|
||||
|
@ -359,17 +359,15 @@ define i1 @allones_v8i16_sign(<8 x i16> %arg) {
|
||||
define i1 @allzeros_v8i16_sign(<8 x i16> %arg) {
|
||||
; SSE2-LABEL: allzeros_v8i16_sign:
|
||||
; SSE2: # %bb.0:
|
||||
; SSE2-NEXT: packsswb %xmm0, %xmm0
|
||||
; SSE2-NEXT: pmovmskb %xmm0, %eax
|
||||
; SSE2-NEXT: testb %al, %al
|
||||
; SSE2-NEXT: testl $43690, %eax # imm = 0xAAAA
|
||||
; SSE2-NEXT: sete %al
|
||||
; SSE2-NEXT: retq
|
||||
;
|
||||
; AVX-LABEL: allzeros_v8i16_sign:
|
||||
; AVX: # %bb.0:
|
||||
; AVX-NEXT: vpacksswb %xmm0, %xmm0, %xmm0
|
||||
; AVX-NEXT: vpmovmskb %xmm0, %eax
|
||||
; AVX-NEXT: testb %al, %al
|
||||
; AVX-NEXT: testl $43690, %eax # imm = 0xAAAA
|
||||
; AVX-NEXT: sete %al
|
||||
; AVX-NEXT: retq
|
||||
;
|
||||
@ -471,10 +469,8 @@ define i1 @allzeros_v16i16_sign(<16 x i16> %arg) {
|
||||
;
|
||||
; AVX2-LABEL: allzeros_v16i16_sign:
|
||||
; AVX2: # %bb.0:
|
||||
; AVX2-NEXT: vextracti128 $1, %ymm0, %xmm1
|
||||
; AVX2-NEXT: vpacksswb %xmm1, %xmm0, %xmm0
|
||||
; AVX2-NEXT: vpmovmskb %xmm0, %eax
|
||||
; AVX2-NEXT: testw %ax, %ax
|
||||
; AVX2-NEXT: vpmovmskb %ymm0, %eax
|
||||
; AVX2-NEXT: testl $-1431655766, %eax # imm = 0xAAAAAAAA
|
||||
; AVX2-NEXT: sete %al
|
||||
; AVX2-NEXT: vzeroupper
|
||||
; AVX2-NEXT: retq
|
||||
@ -764,9 +760,8 @@ define i1 @allzeros_v8i32_sign(<8 x i32> %arg) {
|
||||
; SSE2-LABEL: allzeros_v8i32_sign:
|
||||
; SSE2: # %bb.0:
|
||||
; SSE2-NEXT: packssdw %xmm1, %xmm0
|
||||
; SSE2-NEXT: packsswb %xmm0, %xmm0
|
||||
; SSE2-NEXT: pmovmskb %xmm0, %eax
|
||||
; SSE2-NEXT: testb %al, %al
|
||||
; SSE2-NEXT: testl $43690, %eax # imm = 0xAAAA
|
||||
; SSE2-NEXT: sete %al
|
||||
; SSE2-NEXT: retq
|
||||
;
|
||||
@ -894,10 +889,8 @@ define i1 @allzeros_v16i32_sign(<16 x i32> %arg) {
|
||||
; AVX2-NEXT: vpcmpgtd %ymm0, %ymm2, %ymm0
|
||||
; AVX2-NEXT: vpackssdw %ymm1, %ymm0, %ymm0
|
||||
; AVX2-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,2,1,3]
|
||||
; AVX2-NEXT: vextracti128 $1, %ymm0, %xmm1
|
||||
; AVX2-NEXT: vpacksswb %xmm1, %xmm0, %xmm0
|
||||
; AVX2-NEXT: vpmovmskb %xmm0, %eax
|
||||
; AVX2-NEXT: testw %ax, %ax
|
||||
; AVX2-NEXT: vpmovmskb %ymm0, %eax
|
||||
; AVX2-NEXT: testl $-1431655766, %eax # imm = 0xAAAAAAAA
|
||||
; AVX2-NEXT: sete %al
|
||||
; AVX2-NEXT: vzeroupper
|
||||
; AVX2-NEXT: retq
|
||||
@ -1075,9 +1068,8 @@ define i1 @allzeros_v8i64_sign(<8 x i64> %arg) {
|
||||
; SSE2-NEXT: packssdw %xmm3, %xmm2
|
||||
; SSE2-NEXT: packssdw %xmm1, %xmm0
|
||||
; SSE2-NEXT: packssdw %xmm2, %xmm0
|
||||
; SSE2-NEXT: packsswb %xmm0, %xmm0
|
||||
; SSE2-NEXT: pmovmskb %xmm0, %eax
|
||||
; SSE2-NEXT: testb %al, %al
|
||||
; SSE2-NEXT: testl $43690, %eax # imm = 0xAAAA
|
||||
; SSE2-NEXT: sete %al
|
||||
; SSE2-NEXT: retq
|
||||
;
|
||||
@ -1536,18 +1528,16 @@ define i1 @allzeros_v8i16_and1(<8 x i16> %arg) {
|
||||
; SSE2-LABEL: allzeros_v8i16_and1:
|
||||
; SSE2: # %bb.0:
|
||||
; SSE2-NEXT: psllw $15, %xmm0
|
||||
; SSE2-NEXT: packsswb %xmm0, %xmm0
|
||||
; SSE2-NEXT: pmovmskb %xmm0, %eax
|
||||
; SSE2-NEXT: testb %al, %al
|
||||
; SSE2-NEXT: testl $43690, %eax # imm = 0xAAAA
|
||||
; SSE2-NEXT: sete %al
|
||||
; SSE2-NEXT: retq
|
||||
;
|
||||
; AVX-LABEL: allzeros_v8i16_and1:
|
||||
; AVX: # %bb.0:
|
||||
; AVX-NEXT: vpsllw $15, %xmm0, %xmm0
|
||||
; AVX-NEXT: vpacksswb %xmm0, %xmm0, %xmm0
|
||||
; AVX-NEXT: vpmovmskb %xmm0, %eax
|
||||
; AVX-NEXT: testb %al, %al
|
||||
; AVX-NEXT: testl $43690, %eax # imm = 0xAAAA
|
||||
; AVX-NEXT: sete %al
|
||||
; AVX-NEXT: retq
|
||||
;
|
||||
@ -1822,10 +1812,8 @@ define i1 @allzeros_v16i16_and1(<16 x i16> %arg) {
|
||||
; AVX2-LABEL: allzeros_v16i16_and1:
|
||||
; AVX2: # %bb.0:
|
||||
; AVX2-NEXT: vpsllw $15, %ymm0, %ymm0
|
||||
; AVX2-NEXT: vextracti128 $1, %ymm0, %xmm1
|
||||
; AVX2-NEXT: vpacksswb %xmm1, %xmm0, %xmm0
|
||||
; AVX2-NEXT: vpmovmskb %xmm0, %eax
|
||||
; AVX2-NEXT: testw %ax, %ax
|
||||
; AVX2-NEXT: vpmovmskb %ymm0, %eax
|
||||
; AVX2-NEXT: testl $-1431655766, %eax # imm = 0xAAAAAAAA
|
||||
; AVX2-NEXT: sete %al
|
||||
; AVX2-NEXT: vzeroupper
|
||||
; AVX2-NEXT: retq
|
||||
@ -2000,9 +1988,8 @@ define i1 @allzeros_v8i32_and1(<8 x i32> %arg) {
|
||||
; SSE2-NEXT: pslld $31, %xmm1
|
||||
; SSE2-NEXT: pslld $31, %xmm0
|
||||
; SSE2-NEXT: packssdw %xmm1, %xmm0
|
||||
; SSE2-NEXT: packsswb %xmm0, %xmm0
|
||||
; SSE2-NEXT: pmovmskb %xmm0, %eax
|
||||
; SSE2-NEXT: testb %al, %al
|
||||
; SSE2-NEXT: testl $43690, %eax # imm = 0xAAAA
|
||||
; SSE2-NEXT: sete %al
|
||||
; SSE2-NEXT: retq
|
||||
;
|
||||
@ -2161,10 +2148,8 @@ define i1 @allzeros_v16i32_and1(<16 x i32> %arg) {
|
||||
; AVX2-NEXT: vpsrad $31, %ymm0, %ymm0
|
||||
; AVX2-NEXT: vpackssdw %ymm1, %ymm0, %ymm0
|
||||
; AVX2-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,2,1,3]
|
||||
; AVX2-NEXT: vextracti128 $1, %ymm0, %xmm1
|
||||
; AVX2-NEXT: vpacksswb %xmm1, %xmm0, %xmm0
|
||||
; AVX2-NEXT: vpmovmskb %xmm0, %eax
|
||||
; AVX2-NEXT: testw %ax, %ax
|
||||
; AVX2-NEXT: vpmovmskb %ymm0, %eax
|
||||
; AVX2-NEXT: testl $-1431655766, %eax # imm = 0xAAAAAAAA
|
||||
; AVX2-NEXT: sete %al
|
||||
; AVX2-NEXT: vzeroupper
|
||||
; AVX2-NEXT: retq
|
||||
@ -2470,9 +2455,8 @@ define i1 @allzeros_v8i64_and1(<8 x i64> %arg) {
|
||||
; SSE2-NEXT: psllq $63, %xmm0
|
||||
; SSE2-NEXT: packssdw %xmm1, %xmm0
|
||||
; SSE2-NEXT: packssdw %xmm2, %xmm0
|
||||
; SSE2-NEXT: packsswb %xmm0, %xmm0
|
||||
; SSE2-NEXT: pmovmskb %xmm0, %eax
|
||||
; SSE2-NEXT: testb %al, %al
|
||||
; SSE2-NEXT: testl $43690, %eax # imm = 0xAAAA
|
||||
; SSE2-NEXT: sete %al
|
||||
; SSE2-NEXT: retq
|
||||
;
|
||||
@ -2937,18 +2921,16 @@ define i1 @allzeros_v8i16_and4(<8 x i16> %arg) {
|
||||
; SSE2-LABEL: allzeros_v8i16_and4:
|
||||
; SSE2: # %bb.0:
|
||||
; SSE2-NEXT: psllw $13, %xmm0
|
||||
; SSE2-NEXT: packsswb %xmm0, %xmm0
|
||||
; SSE2-NEXT: pmovmskb %xmm0, %eax
|
||||
; SSE2-NEXT: testb %al, %al
|
||||
; SSE2-NEXT: testl $43690, %eax # imm = 0xAAAA
|
||||
; SSE2-NEXT: sete %al
|
||||
; SSE2-NEXT: retq
|
||||
;
|
||||
; AVX-LABEL: allzeros_v8i16_and4:
|
||||
; AVX: # %bb.0:
|
||||
; AVX-NEXT: vpsllw $13, %xmm0, %xmm0
|
||||
; AVX-NEXT: vpacksswb %xmm0, %xmm0, %xmm0
|
||||
; AVX-NEXT: vpmovmskb %xmm0, %eax
|
||||
; AVX-NEXT: testb %al, %al
|
||||
; AVX-NEXT: testl $43690, %eax # imm = 0xAAAA
|
||||
; AVX-NEXT: sete %al
|
||||
; AVX-NEXT: retq
|
||||
;
|
||||
@ -3223,10 +3205,8 @@ define i1 @allzeros_v16i16_and4(<16 x i16> %arg) {
|
||||
; AVX2-LABEL: allzeros_v16i16_and4:
|
||||
; AVX2: # %bb.0:
|
||||
; AVX2-NEXT: vpsllw $13, %ymm0, %ymm0
|
||||
; AVX2-NEXT: vextracti128 $1, %ymm0, %xmm1
|
||||
; AVX2-NEXT: vpacksswb %xmm1, %xmm0, %xmm0
|
||||
; AVX2-NEXT: vpmovmskb %xmm0, %eax
|
||||
; AVX2-NEXT: testw %ax, %ax
|
||||
; AVX2-NEXT: vpmovmskb %ymm0, %eax
|
||||
; AVX2-NEXT: testl $-1431655766, %eax # imm = 0xAAAAAAAA
|
||||
; AVX2-NEXT: sete %al
|
||||
; AVX2-NEXT: vzeroupper
|
||||
; AVX2-NEXT: retq
|
||||
@ -3401,9 +3381,8 @@ define i1 @allzeros_v8i32_and4(<8 x i32> %arg) {
|
||||
; SSE2-NEXT: pslld $29, %xmm1
|
||||
; SSE2-NEXT: pslld $29, %xmm0
|
||||
; SSE2-NEXT: packssdw %xmm1, %xmm0
|
||||
; SSE2-NEXT: packsswb %xmm0, %xmm0
|
||||
; SSE2-NEXT: pmovmskb %xmm0, %eax
|
||||
; SSE2-NEXT: testb %al, %al
|
||||
; SSE2-NEXT: testl $43690, %eax # imm = 0xAAAA
|
||||
; SSE2-NEXT: sete %al
|
||||
; SSE2-NEXT: retq
|
||||
;
|
||||
@ -3562,10 +3541,8 @@ define i1 @allzeros_v16i32_and4(<16 x i32> %arg) {
|
||||
; AVX2-NEXT: vpsrad $31, %ymm0, %ymm0
|
||||
; AVX2-NEXT: vpackssdw %ymm1, %ymm0, %ymm0
|
||||
; AVX2-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,2,1,3]
|
||||
; AVX2-NEXT: vextracti128 $1, %ymm0, %xmm1
|
||||
; AVX2-NEXT: vpacksswb %xmm1, %xmm0, %xmm0
|
||||
; AVX2-NEXT: vpmovmskb %xmm0, %eax
|
||||
; AVX2-NEXT: testw %ax, %ax
|
||||
; AVX2-NEXT: vpmovmskb %ymm0, %eax
|
||||
; AVX2-NEXT: testl $-1431655766, %eax # imm = 0xAAAAAAAA
|
||||
; AVX2-NEXT: sete %al
|
||||
; AVX2-NEXT: vzeroupper
|
||||
; AVX2-NEXT: retq
|
||||
@ -3871,9 +3848,8 @@ define i1 @allzeros_v8i64_and4(<8 x i64> %arg) {
|
||||
; SSE2-NEXT: psllq $61, %xmm0
|
||||
; SSE2-NEXT: packssdw %xmm1, %xmm0
|
||||
; SSE2-NEXT: packssdw %xmm2, %xmm0
|
||||
; SSE2-NEXT: packsswb %xmm0, %xmm0
|
||||
; SSE2-NEXT: pmovmskb %xmm0, %eax
|
||||
; SSE2-NEXT: testb %al, %al
|
||||
; SSE2-NEXT: testl $43690, %eax # imm = 0xAAAA
|
||||
; SSE2-NEXT: sete %al
|
||||
; SSE2-NEXT: retq
|
||||
;
|
||||
|
@ -888,9 +888,8 @@ define i1 @bool_reduction_v8f32(<8 x float> %x, <8 x float> %y) {
|
||||
; SSE-NEXT: cmpneqps %xmm3, %xmm1
|
||||
; SSE-NEXT: cmpneqps %xmm2, %xmm0
|
||||
; SSE-NEXT: packssdw %xmm1, %xmm0
|
||||
; SSE-NEXT: packsswb %xmm0, %xmm0
|
||||
; SSE-NEXT: pmovmskb %xmm0, %eax
|
||||
; SSE-NEXT: testb %al, %al
|
||||
; SSE-NEXT: testw %ax, %ax
|
||||
; SSE-NEXT: setne %al
|
||||
; SSE-NEXT: retq
|
||||
;
|
||||
@ -996,18 +995,16 @@ define i1 @bool_reduction_v8i16(<8 x i16> %x, <8 x i16> %y) {
|
||||
; SSE-LABEL: bool_reduction_v8i16:
|
||||
; SSE: # %bb.0:
|
||||
; SSE-NEXT: pcmpgtw %xmm0, %xmm1
|
||||
; SSE-NEXT: packsswb %xmm1, %xmm1
|
||||
; SSE-NEXT: pmovmskb %xmm1, %eax
|
||||
; SSE-NEXT: testb %al, %al
|
||||
; SSE-NEXT: testw %ax, %ax
|
||||
; SSE-NEXT: setne %al
|
||||
; SSE-NEXT: retq
|
||||
;
|
||||
; AVX-LABEL: bool_reduction_v8i16:
|
||||
; AVX: # %bb.0:
|
||||
; AVX-NEXT: vpcmpgtw %xmm0, %xmm1, %xmm0
|
||||
; AVX-NEXT: vpacksswb %xmm0, %xmm0, %xmm0
|
||||
; AVX-NEXT: vpmovmskb %xmm0, %eax
|
||||
; AVX-NEXT: testb %al, %al
|
||||
; AVX-NEXT: testw %ax, %ax
|
||||
; AVX-NEXT: setne %al
|
||||
; AVX-NEXT: retq
|
||||
;
|
||||
@ -1123,9 +1120,8 @@ define i1 @bool_reduction_v8i32(<8 x i32> %x, <8 x i32> %y) {
|
||||
; SSE-NEXT: pminud %xmm0, %xmm2
|
||||
; SSE-NEXT: pcmpeqd %xmm0, %xmm2
|
||||
; SSE-NEXT: packssdw %xmm3, %xmm2
|
||||
; SSE-NEXT: packsswb %xmm2, %xmm2
|
||||
; SSE-NEXT: pmovmskb %xmm2, %eax
|
||||
; SSE-NEXT: testb %al, %al
|
||||
; SSE-NEXT: testw %ax, %ax
|
||||
; SSE-NEXT: setne %al
|
||||
; SSE-NEXT: retq
|
||||
;
|
||||
@ -1200,10 +1196,8 @@ define i1 @bool_reduction_v16i16(<16 x i16> %x, <16 x i16> %y) {
|
||||
; AVX2-LABEL: bool_reduction_v16i16:
|
||||
; AVX2: # %bb.0:
|
||||
; AVX2-NEXT: vpcmpeqw %ymm1, %ymm0, %ymm0
|
||||
; AVX2-NEXT: vextracti128 $1, %ymm0, %xmm1
|
||||
; AVX2-NEXT: vpacksswb %xmm1, %xmm0, %xmm0
|
||||
; AVX2-NEXT: vpmovmskb %xmm0, %eax
|
||||
; AVX2-NEXT: testw %ax, %ax
|
||||
; AVX2-NEXT: vpmovmskb %ymm0, %eax
|
||||
; AVX2-NEXT: testl %eax, %eax
|
||||
; AVX2-NEXT: setne %al
|
||||
; AVX2-NEXT: vzeroupper
|
||||
; AVX2-NEXT: retq
|
||||
|
@ -116,9 +116,8 @@ define i1 @trunc_v8i16_v8i1(<8 x i8>) {
|
||||
; SSE2: # %bb.0:
|
||||
; SSE2-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7]
|
||||
; SSE2-NEXT: psllw $15, %xmm0
|
||||
; SSE2-NEXT: packsswb %xmm0, %xmm0
|
||||
; SSE2-NEXT: pmovmskb %xmm0, %eax
|
||||
; SSE2-NEXT: testb %al, %al
|
||||
; SSE2-NEXT: testl $43690, %eax # imm = 0xAAAA
|
||||
; SSE2-NEXT: setne %al
|
||||
; SSE2-NEXT: retq
|
||||
;
|
||||
@ -126,9 +125,8 @@ define i1 @trunc_v8i16_v8i1(<8 x i8>) {
|
||||
; SSE41: # %bb.0:
|
||||
; SSE41-NEXT: pmovzxbw {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero
|
||||
; SSE41-NEXT: psllw $15, %xmm0
|
||||
; SSE41-NEXT: packsswb %xmm0, %xmm0
|
||||
; SSE41-NEXT: pmovmskb %xmm0, %eax
|
||||
; SSE41-NEXT: testb %al, %al
|
||||
; SSE41-NEXT: testl $43690, %eax # imm = 0xAAAA
|
||||
; SSE41-NEXT: setne %al
|
||||
; SSE41-NEXT: retq
|
||||
;
|
||||
@ -136,9 +134,8 @@ define i1 @trunc_v8i16_v8i1(<8 x i8>) {
|
||||
; AVX: # %bb.0:
|
||||
; AVX-NEXT: vpmovzxbw {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero
|
||||
; AVX-NEXT: vpsllw $15, %xmm0, %xmm0
|
||||
; AVX-NEXT: vpacksswb %xmm0, %xmm0, %xmm0
|
||||
; AVX-NEXT: vpmovmskb %xmm0, %eax
|
||||
; AVX-NEXT: testb %al, %al
|
||||
; AVX-NEXT: testl $43690, %eax # imm = 0xAAAA
|
||||
; AVX-NEXT: setne %al
|
||||
; AVX-NEXT: retq
|
||||
;
|
||||
@ -269,9 +266,8 @@ define i1 @trunc_v8i32_v8i1(<8 x i32>) {
|
||||
; SSE2-NEXT: psrad $16, %xmm0
|
||||
; SSE2-NEXT: packssdw %xmm1, %xmm0
|
||||
; SSE2-NEXT: psllw $15, %xmm0
|
||||
; SSE2-NEXT: packsswb %xmm0, %xmm0
|
||||
; SSE2-NEXT: pmovmskb %xmm0, %eax
|
||||
; SSE2-NEXT: testb %al, %al
|
||||
; SSE2-NEXT: testl $43690, %eax # imm = 0xAAAA
|
||||
; SSE2-NEXT: setne %al
|
||||
; SSE2-NEXT: retq
|
||||
;
|
||||
@ -282,9 +278,8 @@ define i1 @trunc_v8i32_v8i1(<8 x i32>) {
|
||||
; SSE41-NEXT: pblendw {{.*#+}} xmm0 = xmm0[0],xmm2[1],xmm0[2],xmm2[3],xmm0[4],xmm2[5],xmm0[6],xmm2[7]
|
||||
; SSE41-NEXT: packusdw %xmm1, %xmm0
|
||||
; SSE41-NEXT: psllw $15, %xmm0
|
||||
; SSE41-NEXT: packsswb %xmm0, %xmm0
|
||||
; SSE41-NEXT: pmovmskb %xmm0, %eax
|
||||
; SSE41-NEXT: testb %al, %al
|
||||
; SSE41-NEXT: testl $43690, %eax # imm = 0xAAAA
|
||||
; SSE41-NEXT: setne %al
|
||||
; SSE41-NEXT: retq
|
||||
;
|
||||
@ -296,9 +291,8 @@ define i1 @trunc_v8i32_v8i1(<8 x i32>) {
|
||||
; AVX1-NEXT: vpshufb %xmm2, %xmm0, %xmm0
|
||||
; AVX1-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0]
|
||||
; AVX1-NEXT: vpsllw $15, %xmm0, %xmm0
|
||||
; AVX1-NEXT: vpacksswb %xmm0, %xmm0, %xmm0
|
||||
; AVX1-NEXT: vpmovmskb %xmm0, %eax
|
||||
; AVX1-NEXT: testb %al, %al
|
||||
; AVX1-NEXT: testl $43690, %eax # imm = 0xAAAA
|
||||
; AVX1-NEXT: setne %al
|
||||
; AVX1-NEXT: vzeroupper
|
||||
; AVX1-NEXT: retq
|
||||
@ -308,9 +302,8 @@ define i1 @trunc_v8i32_v8i1(<8 x i32>) {
|
||||
; AVX2-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,1,4,5,8,9,12,13,8,9,12,13,12,13,14,15,16,17,20,21,24,25,28,29,24,25,28,29,28,29,30,31]
|
||||
; AVX2-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,2,2,3]
|
||||
; AVX2-NEXT: vpsllw $15, %xmm0, %xmm0
|
||||
; AVX2-NEXT: vpacksswb %xmm0, %xmm0, %xmm0
|
||||
; AVX2-NEXT: vpmovmskb %xmm0, %eax
|
||||
; AVX2-NEXT: testb %al, %al
|
||||
; AVX2-NEXT: testl $43690, %eax # imm = 0xAAAA
|
||||
; AVX2-NEXT: setne %al
|
||||
; AVX2-NEXT: vzeroupper
|
||||
; AVX2-NEXT: retq
|
||||
@ -505,9 +498,8 @@ define i1 @trunc_v8i64_v8i1(<8 x i64>) {
|
||||
; SSE2-NEXT: punpckldq {{.*#+}} xmm2 = xmm2[0],xmm1[0],xmm2[1],xmm1[1]
|
||||
; SSE2-NEXT: movsd {{.*#+}} xmm2 = xmm0[0],xmm2[1]
|
||||
; SSE2-NEXT: psllw $15, %xmm2
|
||||
; SSE2-NEXT: packsswb %xmm2, %xmm2
|
||||
; SSE2-NEXT: pmovmskb %xmm2, %eax
|
||||
; SSE2-NEXT: testb %al, %al
|
||||
; SSE2-NEXT: testl $43690, %eax # imm = 0xAAAA
|
||||
; SSE2-NEXT: setne %al
|
||||
; SSE2-NEXT: retq
|
||||
;
|
||||
@ -522,9 +514,8 @@ define i1 @trunc_v8i64_v8i1(<8 x i64>) {
|
||||
; SSE41-NEXT: packusdw %xmm1, %xmm0
|
||||
; SSE41-NEXT: packusdw %xmm2, %xmm0
|
||||
; SSE41-NEXT: psllw $15, %xmm0
|
||||
; SSE41-NEXT: packsswb %xmm0, %xmm0
|
||||
; SSE41-NEXT: pmovmskb %xmm0, %eax
|
||||
; SSE41-NEXT: testb %al, %al
|
||||
; SSE41-NEXT: testl $43690, %eax # imm = 0xAAAA
|
||||
; SSE41-NEXT: setne %al
|
||||
; SSE41-NEXT: retq
|
||||
;
|
||||
@ -539,9 +530,8 @@ define i1 @trunc_v8i64_v8i1(<8 x i64>) {
|
||||
; AVX1-NEXT: vpackusdw %xmm2, %xmm0, %xmm0
|
||||
; AVX1-NEXT: vpackusdw %xmm1, %xmm0, %xmm0
|
||||
; AVX1-NEXT: vpsllw $15, %xmm0, %xmm0
|
||||
; AVX1-NEXT: vpacksswb %xmm0, %xmm0, %xmm0
|
||||
; AVX1-NEXT: vpmovmskb %xmm0, %eax
|
||||
; AVX1-NEXT: testb %al, %al
|
||||
; AVX1-NEXT: testl $43690, %eax # imm = 0xAAAA
|
||||
; AVX1-NEXT: setne %al
|
||||
; AVX1-NEXT: vzeroupper
|
||||
; AVX1-NEXT: retq
|
||||
@ -554,9 +544,8 @@ define i1 @trunc_v8i64_v8i1(<8 x i64>) {
|
||||
; AVX2-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,1,4,5,8,9,12,13,8,9,12,13,12,13,14,15,16,17,20,21,24,25,28,29,24,25,28,29,28,29,30,31]
|
||||
; AVX2-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,2,2,3]
|
||||
; AVX2-NEXT: vpsllw $15, %xmm0, %xmm0
|
||||
; AVX2-NEXT: vpacksswb %xmm0, %xmm0, %xmm0
|
||||
; AVX2-NEXT: vpmovmskb %xmm0, %eax
|
||||
; AVX2-NEXT: testb %al, %al
|
||||
; AVX2-NEXT: testl $43690, %eax # imm = 0xAAAA
|
||||
; AVX2-NEXT: setne %al
|
||||
; AVX2-NEXT: vzeroupper
|
||||
; AVX2-NEXT: retq
|
||||
@ -966,9 +955,8 @@ define i1 @icmp_v8i16_v8i1(<8 x i8>) {
|
||||
; SSE2-NEXT: pxor %xmm1, %xmm1
|
||||
; SSE2-NEXT: pcmpeqb %xmm0, %xmm1
|
||||
; SSE2-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3],xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7]
|
||||
; SSE2-NEXT: packsswb %xmm0, %xmm0
|
||||
; SSE2-NEXT: pmovmskb %xmm0, %eax
|
||||
; SSE2-NEXT: testb %al, %al
|
||||
; SSE2-NEXT: testl $43690, %eax # imm = 0xAAAA
|
||||
; SSE2-NEXT: setne %al
|
||||
; SSE2-NEXT: retq
|
||||
;
|
||||
@ -977,9 +965,8 @@ define i1 @icmp_v8i16_v8i1(<8 x i8>) {
|
||||
; SSE41-NEXT: pxor %xmm1, %xmm1
|
||||
; SSE41-NEXT: pcmpeqb %xmm0, %xmm1
|
||||
; SSE41-NEXT: pmovsxbw %xmm1, %xmm0
|
||||
; SSE41-NEXT: packsswb %xmm0, %xmm0
|
||||
; SSE41-NEXT: pmovmskb %xmm0, %eax
|
||||
; SSE41-NEXT: testb %al, %al
|
||||
; SSE41-NEXT: testw %ax, %ax
|
||||
; SSE41-NEXT: setne %al
|
||||
; SSE41-NEXT: retq
|
||||
;
|
||||
@ -988,9 +975,8 @@ define i1 @icmp_v8i16_v8i1(<8 x i8>) {
|
||||
; AVX-NEXT: vpxor %xmm1, %xmm1, %xmm1
|
||||
; AVX-NEXT: vpcmpeqb %xmm1, %xmm0, %xmm0
|
||||
; AVX-NEXT: vpmovsxbw %xmm0, %xmm0
|
||||
; AVX-NEXT: vpacksswb %xmm0, %xmm0, %xmm0
|
||||
; AVX-NEXT: vpmovmskb %xmm0, %eax
|
||||
; AVX-NEXT: testb %al, %al
|
||||
; AVX-NEXT: testw %ax, %ax
|
||||
; AVX-NEXT: setne %al
|
||||
; AVX-NEXT: retq
|
||||
;
|
||||
@ -1166,9 +1152,8 @@ define i1 @icmp_v8i32_v8i1(<8 x i32>) {
|
||||
; SSE-NEXT: pcmpeqd %xmm2, %xmm1
|
||||
; SSE-NEXT: pcmpeqd %xmm2, %xmm0
|
||||
; SSE-NEXT: packssdw %xmm1, %xmm0
|
||||
; SSE-NEXT: packsswb %xmm0, %xmm0
|
||||
; SSE-NEXT: pmovmskb %xmm0, %eax
|
||||
; SSE-NEXT: testb %al, %al
|
||||
; SSE-NEXT: testw %ax, %ax
|
||||
; SSE-NEXT: setne %al
|
||||
; SSE-NEXT: retq
|
||||
;
|
||||
@ -1257,10 +1242,8 @@ define i1 @icmp_v16i16_v16i1(<16 x i16>) {
|
||||
; AVX2: # %bb.0:
|
||||
; AVX2-NEXT: vpxor %xmm1, %xmm1, %xmm1
|
||||
; AVX2-NEXT: vpcmpeqw %ymm1, %ymm0, %ymm0
|
||||
; AVX2-NEXT: vextracti128 $1, %ymm0, %xmm1
|
||||
; AVX2-NEXT: vpacksswb %xmm1, %xmm0, %xmm0
|
||||
; AVX2-NEXT: vpmovmskb %xmm0, %eax
|
||||
; AVX2-NEXT: testw %ax, %ax
|
||||
; AVX2-NEXT: vpmovmskb %ymm0, %eax
|
||||
; AVX2-NEXT: testl %eax, %eax
|
||||
; AVX2-NEXT: setne %al
|
||||
; AVX2-NEXT: vzeroupper
|
||||
; AVX2-NEXT: retq
|
||||
@ -1393,9 +1376,8 @@ define i1 @icmp_v8i64_v8i1(<8 x i64>) {
|
||||
; SSE2-NEXT: pand %xmm0, %xmm1
|
||||
; SSE2-NEXT: packssdw %xmm2, %xmm1
|
||||
; SSE2-NEXT: packssdw %xmm3, %xmm1
|
||||
; SSE2-NEXT: packsswb %xmm1, %xmm1
|
||||
; SSE2-NEXT: pmovmskb %xmm1, %eax
|
||||
; SSE2-NEXT: testb %al, %al
|
||||
; SSE2-NEXT: testl $43690, %eax # imm = 0xAAAA
|
||||
; SSE2-NEXT: setne %al
|
||||
; SSE2-NEXT: retq
|
||||
;
|
||||
@ -1409,9 +1391,8 @@ define i1 @icmp_v8i64_v8i1(<8 x i64>) {
|
||||
; SSE41-NEXT: pcmpeqq %xmm4, %xmm0
|
||||
; SSE41-NEXT: packssdw %xmm1, %xmm0
|
||||
; SSE41-NEXT: packssdw %xmm2, %xmm0
|
||||
; SSE41-NEXT: packsswb %xmm0, %xmm0
|
||||
; SSE41-NEXT: pmovmskb %xmm0, %eax
|
||||
; SSE41-NEXT: testb %al, %al
|
||||
; SSE41-NEXT: testl $43690, %eax # imm = 0xAAAA
|
||||
; SSE41-NEXT: setne %al
|
||||
; SSE41-NEXT: retq
|
||||
;
|
||||
@ -1518,10 +1499,8 @@ define i1 @icmp_v16i32_v16i1(<16 x i32>) {
|
||||
; AVX2-NEXT: vpcmpeqd %ymm2, %ymm0, %ymm0
|
||||
; AVX2-NEXT: vpackssdw %ymm1, %ymm0, %ymm0
|
||||
; AVX2-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,2,1,3]
|
||||
; AVX2-NEXT: vextracti128 $1, %ymm0, %xmm1
|
||||
; AVX2-NEXT: vpacksswb %xmm1, %xmm0, %xmm0
|
||||
; AVX2-NEXT: vpmovmskb %xmm0, %eax
|
||||
; AVX2-NEXT: testw %ax, %ax
|
||||
; AVX2-NEXT: vpmovmskb %ymm0, %eax
|
||||
; AVX2-NEXT: testl $-1431655766, %eax # imm = 0xAAAAAAAA
|
||||
; AVX2-NEXT: setne %al
|
||||
; AVX2-NEXT: vzeroupper
|
||||
; AVX2-NEXT: retq
|
||||
|
Loading…
x
Reference in New Issue
Block a user