mirror of
https://github.com/RPCS3/llvm-mirror.git
synced 2024-11-22 10:42:39 +01:00
[X86][SelectionDAG] Add SADDO_CARRY and SSUBO_CARRY to support multipart signed add/sub overflow legalization.
This passes existing X86 test but I'm not sure if it handles all type legalization cases it needs to. Alternative to D89200 Reviewed By: efriedma Differential Revision: https://reviews.llvm.org/D89222
This commit is contained in:
parent
2b696bcac5
commit
0120cd1285
@ -283,6 +283,16 @@ enum NodeType {
|
||||
ADDCARRY,
|
||||
SUBCARRY,
|
||||
|
||||
/// Carry-using overflow-aware nodes for multiple precision addition and
|
||||
/// subtraction. These nodes take three operands: The first two are normal lhs
|
||||
/// and rhs to the add or sub, and the third is a boolean indicating if there
|
||||
/// is an incoming carry. They produce two results: the normal result of the
|
||||
/// add or sub, and a boolean that indicates if an overflow occured (*not*
|
||||
/// flag, because it may be a store to memory, etc.). If the type of the
|
||||
/// boolean is not i1 then the high bits conform to getBooleanContents.
|
||||
SADDO_CARRY,
|
||||
SSUBO_CARRY,
|
||||
|
||||
/// RESULT, BOOL = [SU]ADDO(LHS, RHS) - Overflow-aware nodes for addition.
|
||||
/// These nodes take two operands: the normal LHS and RHS to the add. They
|
||||
/// produce two results: the normal result of the add, and a boolean that
|
||||
|
@ -411,9 +411,11 @@ namespace {
|
||||
SDValue visitSUBO(SDNode *N);
|
||||
SDValue visitADDE(SDNode *N);
|
||||
SDValue visitADDCARRY(SDNode *N);
|
||||
SDValue visitSADDO_CARRY(SDNode *N);
|
||||
SDValue visitADDCARRYLike(SDValue N0, SDValue N1, SDValue CarryIn, SDNode *N);
|
||||
SDValue visitSUBE(SDNode *N);
|
||||
SDValue visitSUBCARRY(SDNode *N);
|
||||
SDValue visitSSUBO_CARRY(SDNode *N);
|
||||
SDValue visitMUL(SDNode *N);
|
||||
SDValue visitMULFIX(SDNode *N);
|
||||
SDValue useDivRem(SDNode *N);
|
||||
@ -1600,8 +1602,10 @@ SDValue DAGCombiner::visit(SDNode *N) {
|
||||
case ISD::USUBO: return visitSUBO(N);
|
||||
case ISD::ADDE: return visitADDE(N);
|
||||
case ISD::ADDCARRY: return visitADDCARRY(N);
|
||||
case ISD::SADDO_CARRY: return visitSADDO_CARRY(N);
|
||||
case ISD::SUBE: return visitSUBE(N);
|
||||
case ISD::SUBCARRY: return visitSUBCARRY(N);
|
||||
case ISD::SSUBO_CARRY: return visitSSUBO_CARRY(N);
|
||||
case ISD::SMULFIX:
|
||||
case ISD::SMULFIXSAT:
|
||||
case ISD::UMULFIX:
|
||||
@ -2836,6 +2840,28 @@ SDValue DAGCombiner::visitADDCARRY(SDNode *N) {
|
||||
return SDValue();
|
||||
}
|
||||
|
||||
SDValue DAGCombiner::visitSADDO_CARRY(SDNode *N) {
|
||||
SDValue N0 = N->getOperand(0);
|
||||
SDValue N1 = N->getOperand(1);
|
||||
SDValue CarryIn = N->getOperand(2);
|
||||
SDLoc DL(N);
|
||||
|
||||
// canonicalize constant to RHS
|
||||
ConstantSDNode *N0C = dyn_cast<ConstantSDNode>(N0);
|
||||
ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N1);
|
||||
if (N0C && !N1C)
|
||||
return DAG.getNode(ISD::SADDO_CARRY, DL, N->getVTList(), N1, N0, CarryIn);
|
||||
|
||||
// fold (saddo_carry x, y, false) -> (saddo x, y)
|
||||
if (isNullConstant(CarryIn)) {
|
||||
if (!LegalOperations ||
|
||||
TLI.isOperationLegalOrCustom(ISD::SADDO, N->getValueType(0)))
|
||||
return DAG.getNode(ISD::SADDO, DL, N->getVTList(), N0, N1);
|
||||
}
|
||||
|
||||
return SDValue();
|
||||
}
|
||||
|
||||
/**
|
||||
* If we are facing some sort of diamond carry propapagtion pattern try to
|
||||
* break it up to generate something like:
|
||||
@ -3517,6 +3543,21 @@ SDValue DAGCombiner::visitSUBCARRY(SDNode *N) {
|
||||
return SDValue();
|
||||
}
|
||||
|
||||
SDValue DAGCombiner::visitSSUBO_CARRY(SDNode *N) {
|
||||
SDValue N0 = N->getOperand(0);
|
||||
SDValue N1 = N->getOperand(1);
|
||||
SDValue CarryIn = N->getOperand(2);
|
||||
|
||||
// fold (ssubo_carry x, y, false) -> (ssubo x, y)
|
||||
if (isNullConstant(CarryIn)) {
|
||||
if (!LegalOperations ||
|
||||
TLI.isOperationLegalOrCustom(ISD::SSUBO, N->getValueType(0)))
|
||||
return DAG.getNode(ISD::SSUBO, SDLoc(N), N->getVTList(), N0, N1);
|
||||
}
|
||||
|
||||
return SDValue();
|
||||
}
|
||||
|
||||
// Notice that "mulfix" can be any of SMULFIX, SMULFIXSAT, UMULFIX and
|
||||
// UMULFIXSAT here.
|
||||
SDValue DAGCombiner::visitMULFIX(SDNode *N) {
|
||||
|
@ -152,6 +152,9 @@ void DAGTypeLegalizer::PromoteIntegerResult(SDNode *N, unsigned ResNo) {
|
||||
case ISD::ADDCARRY:
|
||||
case ISD::SUBCARRY: Res = PromoteIntRes_ADDSUBCARRY(N, ResNo); break;
|
||||
|
||||
case ISD::SADDO_CARRY:
|
||||
case ISD::SSUBO_CARRY: Res = PromoteIntRes_SADDSUBO_CARRY(N, ResNo); break;
|
||||
|
||||
case ISD::SADDSAT:
|
||||
case ISD::UADDSAT:
|
||||
case ISD::SSUBSAT:
|
||||
@ -1288,6 +1291,12 @@ SDValue DAGTypeLegalizer::PromoteIntRes_ADDSUBCARRY(SDNode *N, unsigned ResNo) {
|
||||
return SDValue(Res.getNode(), 0);
|
||||
}
|
||||
|
||||
SDValue DAGTypeLegalizer::PromoteIntRes_SADDSUBO_CARRY(SDNode *N,
|
||||
unsigned ResNo) {
|
||||
assert(ResNo == 1 && "Don't know how to promote other results yet.");
|
||||
return PromoteIntRes_Overflow(N);
|
||||
}
|
||||
|
||||
SDValue DAGTypeLegalizer::PromoteIntRes_ABS(SDNode *N) {
|
||||
SDValue Op0 = SExtPromotedInteger(N->getOperand(0));
|
||||
return DAG.getNode(ISD::ABS, SDLoc(N), Op0.getValueType(), Op0);
|
||||
@ -1470,6 +1479,8 @@ bool DAGTypeLegalizer::PromoteIntegerOperand(SDNode *N, unsigned OpNo) {
|
||||
case ISD::ROTL:
|
||||
case ISD::ROTR: Res = PromoteIntOp_Shift(N); break;
|
||||
|
||||
case ISD::SADDO_CARRY:
|
||||
case ISD::SSUBO_CARRY:
|
||||
case ISD::ADDCARRY:
|
||||
case ISD::SUBCARRY: Res = PromoteIntOp_ADDSUBCARRY(N, OpNo); break;
|
||||
|
||||
@ -2087,6 +2098,9 @@ void DAGTypeLegalizer::ExpandIntegerResult(SDNode *N, unsigned ResNo) {
|
||||
case ISD::ADDCARRY:
|
||||
case ISD::SUBCARRY: ExpandIntRes_ADDSUBCARRY(N, Lo, Hi); break;
|
||||
|
||||
case ISD::SADDO_CARRY:
|
||||
case ISD::SSUBO_CARRY: ExpandIntRes_SADDSUBO_CARRY(N, Lo, Hi); break;
|
||||
|
||||
case ISD::SHL:
|
||||
case ISD::SRA:
|
||||
case ISD::SRL: ExpandIntRes_Shift(N, Lo, Hi); break;
|
||||
@ -2710,6 +2724,26 @@ void DAGTypeLegalizer::ExpandIntRes_ADDSUBCARRY(SDNode *N,
|
||||
ReplaceValueWith(SDValue(N, 1), Hi.getValue(1));
|
||||
}
|
||||
|
||||
void DAGTypeLegalizer::ExpandIntRes_SADDSUBO_CARRY(SDNode *N,
|
||||
SDValue &Lo, SDValue &Hi) {
|
||||
// Expand the subcomponents.
|
||||
SDValue LHSL, LHSH, RHSL, RHSH;
|
||||
SDLoc dl(N);
|
||||
GetExpandedInteger(N->getOperand(0), LHSL, LHSH);
|
||||
GetExpandedInteger(N->getOperand(1), RHSL, RHSH);
|
||||
SDVTList VTList = DAG.getVTList(LHSL.getValueType(), N->getValueType(1));
|
||||
|
||||
// We need to use an unsigned carry op for the lo part.
|
||||
unsigned CarryOp = N->getOpcode() == ISD::SADDO_CARRY ? ISD::ADDCARRY
|
||||
: ISD::SUBCARRY;
|
||||
Lo = DAG.getNode(CarryOp, dl, VTList, { LHSL, RHSL, N->getOperand(2) });
|
||||
Hi = DAG.getNode(N->getOpcode(), dl, VTList, { LHSH, RHSH, Lo.getValue(1) });
|
||||
|
||||
// Legalized the flag result - switch anything that used the old flag to
|
||||
// use the new one.
|
||||
ReplaceValueWith(SDValue(N, 1), Hi.getValue(1));
|
||||
}
|
||||
|
||||
void DAGTypeLegalizer::ExpandIntRes_ANY_EXTEND(SDNode *N,
|
||||
SDValue &Lo, SDValue &Hi) {
|
||||
EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0));
|
||||
@ -3515,40 +3549,66 @@ void DAGTypeLegalizer::ExpandIntRes_SADDSUBO(SDNode *Node,
|
||||
SDValue RHS = Node->getOperand(1);
|
||||
SDLoc dl(Node);
|
||||
|
||||
// Expand the result by simply replacing it with the equivalent
|
||||
// non-overflow-checking operation.
|
||||
SDValue Sum = DAG.getNode(Node->getOpcode() == ISD::SADDO ?
|
||||
ISD::ADD : ISD::SUB, dl, LHS.getValueType(),
|
||||
LHS, RHS);
|
||||
SplitInteger(Sum, Lo, Hi);
|
||||
SDValue Ovf;
|
||||
|
||||
// Compute the overflow.
|
||||
//
|
||||
// LHSSign -> LHS >= 0
|
||||
// RHSSign -> RHS >= 0
|
||||
// SumSign -> Sum >= 0
|
||||
//
|
||||
// Add:
|
||||
// Overflow -> (LHSSign == RHSSign) && (LHSSign != SumSign)
|
||||
// Sub:
|
||||
// Overflow -> (LHSSign != RHSSign) && (LHSSign != SumSign)
|
||||
//
|
||||
EVT OType = Node->getValueType(1);
|
||||
SDValue Zero = DAG.getConstant(0, dl, LHS.getValueType());
|
||||
unsigned CarryOp;
|
||||
switch(Node->getOpcode()) {
|
||||
default: llvm_unreachable("Node has unexpected Opcode");
|
||||
case ISD::SADDO: CarryOp = ISD::SADDO_CARRY; break;
|
||||
case ISD::SSUBO: CarryOp = ISD::SSUBO_CARRY; break;
|
||||
}
|
||||
|
||||
SDValue LHSSign = DAG.getSetCC(dl, OType, LHS, Zero, ISD::SETGE);
|
||||
SDValue RHSSign = DAG.getSetCC(dl, OType, RHS, Zero, ISD::SETGE);
|
||||
SDValue SignsMatch = DAG.getSetCC(dl, OType, LHSSign, RHSSign,
|
||||
Node->getOpcode() == ISD::SADDO ?
|
||||
ISD::SETEQ : ISD::SETNE);
|
||||
bool HasCarryOp = TLI.isOperationLegalOrCustom(
|
||||
CarryOp, TLI.getTypeToExpandTo(*DAG.getContext(), LHS.getValueType()));
|
||||
|
||||
SDValue SumSign = DAG.getSetCC(dl, OType, Sum, Zero, ISD::SETGE);
|
||||
SDValue SumSignNE = DAG.getSetCC(dl, OType, LHSSign, SumSign, ISD::SETNE);
|
||||
if (HasCarryOp) {
|
||||
// Expand the subcomponents.
|
||||
SDValue LHSL, LHSH, RHSL, RHSH;
|
||||
GetExpandedInteger(LHS, LHSL, LHSH);
|
||||
GetExpandedInteger(RHS, RHSL, RHSH);
|
||||
SDVTList VTList = DAG.getVTList(LHSL.getValueType(), Node->getValueType(1));
|
||||
|
||||
SDValue Cmp = DAG.getNode(ISD::AND, dl, OType, SignsMatch, SumSignNE);
|
||||
Lo = DAG.getNode(Node->getOpcode() == ISD::SADDO ?
|
||||
ISD::UADDO : ISD::USUBO, dl, VTList, { LHSL, RHSL });
|
||||
Hi = DAG.getNode(CarryOp, dl, VTList, { LHSH, RHSH, Lo.getValue(1) });
|
||||
|
||||
Ovf = Hi.getValue(1);
|
||||
} else {
|
||||
// Expand the result by simply replacing it with the equivalent
|
||||
// non-overflow-checking operation.
|
||||
SDValue Sum = DAG.getNode(Node->getOpcode() == ISD::SADDO ?
|
||||
ISD::ADD : ISD::SUB, dl, LHS.getValueType(),
|
||||
LHS, RHS);
|
||||
SplitInteger(Sum, Lo, Hi);
|
||||
|
||||
// Compute the overflow.
|
||||
//
|
||||
// LHSSign -> LHS >= 0
|
||||
// RHSSign -> RHS >= 0
|
||||
// SumSign -> Sum >= 0
|
||||
//
|
||||
// Add:
|
||||
// Overflow -> (LHSSign == RHSSign) && (LHSSign != SumSign)
|
||||
// Sub:
|
||||
// Overflow -> (LHSSign != RHSSign) && (LHSSign != SumSign)
|
||||
//
|
||||
EVT OType = Node->getValueType(1);
|
||||
SDValue Zero = DAG.getConstant(0, dl, LHS.getValueType());
|
||||
|
||||
SDValue LHSSign = DAG.getSetCC(dl, OType, LHS, Zero, ISD::SETGE);
|
||||
SDValue RHSSign = DAG.getSetCC(dl, OType, RHS, Zero, ISD::SETGE);
|
||||
SDValue SignsMatch = DAG.getSetCC(dl, OType, LHSSign, RHSSign,
|
||||
Node->getOpcode() == ISD::SADDO ?
|
||||
ISD::SETEQ : ISD::SETNE);
|
||||
|
||||
SDValue SumSign = DAG.getSetCC(dl, OType, Sum, Zero, ISD::SETGE);
|
||||
SDValue SumSignNE = DAG.getSetCC(dl, OType, LHSSign, SumSign, ISD::SETNE);
|
||||
|
||||
Ovf = DAG.getNode(ISD::AND, dl, OType, SignsMatch, SumSignNE);
|
||||
}
|
||||
|
||||
// Use the calculated overflow everywhere.
|
||||
ReplaceValueWith(SDValue(Node, 1), Cmp);
|
||||
ReplaceValueWith(SDValue(Node, 1), Ovf);
|
||||
}
|
||||
|
||||
void DAGTypeLegalizer::ExpandIntRes_SDIV(SDNode *N,
|
||||
|
@ -337,6 +337,7 @@ private:
|
||||
SDValue PromoteIntRes_TRUNCATE(SDNode *N);
|
||||
SDValue PromoteIntRes_UADDSUBO(SDNode *N, unsigned ResNo);
|
||||
SDValue PromoteIntRes_ADDSUBCARRY(SDNode *N, unsigned ResNo);
|
||||
SDValue PromoteIntRes_SADDSUBO_CARRY(SDNode *N, unsigned ResNo);
|
||||
SDValue PromoteIntRes_UNDEF(SDNode *N);
|
||||
SDValue PromoteIntRes_VAARG(SDNode *N);
|
||||
SDValue PromoteIntRes_VSCALE(SDNode *N);
|
||||
@ -429,6 +430,7 @@ private:
|
||||
void ExpandIntRes_ADDSUBC (SDNode *N, SDValue &Lo, SDValue &Hi);
|
||||
void ExpandIntRes_ADDSUBE (SDNode *N, SDValue &Lo, SDValue &Hi);
|
||||
void ExpandIntRes_ADDSUBCARRY (SDNode *N, SDValue &Lo, SDValue &Hi);
|
||||
void ExpandIntRes_SADDSUBO_CARRY (SDNode *N, SDValue &Lo, SDValue &Hi);
|
||||
void ExpandIntRes_BITREVERSE (SDNode *N, SDValue &Lo, SDValue &Hi);
|
||||
void ExpandIntRes_BSWAP (SDNode *N, SDValue &Lo, SDValue &Hi);
|
||||
void ExpandIntRes_PARITY (SDNode *N, SDValue &Lo, SDValue &Hi);
|
||||
|
@ -293,6 +293,7 @@ std::string SDNode::getOperationName(const SelectionDAG *G) const {
|
||||
case ISD::ADDC: return "addc";
|
||||
case ISD::ADDE: return "adde";
|
||||
case ISD::ADDCARRY: return "addcarry";
|
||||
case ISD::SADDO_CARRY: return "saddo_carry";
|
||||
case ISD::SADDO: return "saddo";
|
||||
case ISD::UADDO: return "uaddo";
|
||||
case ISD::SSUBO: return "ssubo";
|
||||
@ -302,6 +303,7 @@ std::string SDNode::getOperationName(const SelectionDAG *G) const {
|
||||
case ISD::SUBC: return "subc";
|
||||
case ISD::SUBE: return "sube";
|
||||
case ISD::SUBCARRY: return "subcarry";
|
||||
case ISD::SSUBO_CARRY: return "ssubo_carry";
|
||||
case ISD::SHL_PARTS: return "shl_parts";
|
||||
case ISD::SRA_PARTS: return "sra_parts";
|
||||
case ISD::SRL_PARTS: return "srl_parts";
|
||||
|
@ -680,6 +680,8 @@ void TargetLoweringBase::initActions() {
|
||||
setOperationAction(ISD::ADDCARRY, VT, Expand);
|
||||
setOperationAction(ISD::SUBCARRY, VT, Expand);
|
||||
setOperationAction(ISD::SETCCCARRY, VT, Expand);
|
||||
setOperationAction(ISD::SADDO_CARRY, VT, Expand);
|
||||
setOperationAction(ISD::SSUBO_CARRY, VT, Expand);
|
||||
|
||||
// ADDC/ADDE/SUBC/SUBE default to expand.
|
||||
setOperationAction(ISD::ADDC, VT, Expand);
|
||||
|
@ -1914,6 +1914,8 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM,
|
||||
setOperationAction(ISD::ADDCARRY, VT, Custom);
|
||||
setOperationAction(ISD::SUBCARRY, VT, Custom);
|
||||
setOperationAction(ISD::SETCCCARRY, VT, Custom);
|
||||
setOperationAction(ISD::SADDO_CARRY, VT, Custom);
|
||||
setOperationAction(ISD::SSUBO_CARRY, VT, Custom);
|
||||
}
|
||||
|
||||
if (!Subtarget.is64Bit()) {
|
||||
@ -29241,6 +29243,7 @@ static SDValue LowerATOMIC_STORE(SDValue Op, SelectionDAG &DAG,
|
||||
static SDValue LowerADDSUBCARRY(SDValue Op, SelectionDAG &DAG) {
|
||||
SDNode *N = Op.getNode();
|
||||
MVT VT = N->getSimpleValueType(0);
|
||||
unsigned Opc = Op.getOpcode();
|
||||
|
||||
// Let legalize expand this if it isn't a legal type yet.
|
||||
if (!DAG.getTargetLoweringInfo().isTypeLegal(VT))
|
||||
@ -29255,11 +29258,14 @@ static SDValue LowerADDSUBCARRY(SDValue Op, SelectionDAG &DAG) {
|
||||
Carry = DAG.getNode(X86ISD::ADD, DL, DAG.getVTList(CarryVT, MVT::i32),
|
||||
Carry, DAG.getAllOnesConstant(DL, CarryVT));
|
||||
|
||||
unsigned Opc = Op.getOpcode() == ISD::ADDCARRY ? X86ISD::ADC : X86ISD::SBB;
|
||||
SDValue Sum = DAG.getNode(Opc, DL, VTs, Op.getOperand(0),
|
||||
Op.getOperand(1), Carry.getValue(1));
|
||||
bool IsAdd = Opc == ISD::ADDCARRY || Opc == ISD::SADDO_CARRY;
|
||||
SDValue Sum = DAG.getNode(IsAdd ? X86ISD::ADC : X86ISD::SBB, DL, VTs,
|
||||
Op.getOperand(0), Op.getOperand(1),
|
||||
Carry.getValue(1));
|
||||
|
||||
SDValue SetCC = getSETCC(X86::COND_B, Sum.getValue(1), DL, DAG);
|
||||
bool IsSigned = Opc == ISD::SADDO_CARRY || Opc == ISD::SSUBO_CARRY;
|
||||
SDValue SetCC = getSETCC(IsSigned ? X86::COND_O : X86::COND_B,
|
||||
Sum.getValue(1), DL, DAG);
|
||||
if (N->getValueType(1) == MVT::i1)
|
||||
SetCC = DAG.getNode(ISD::TRUNCATE, DL, MVT::i1, SetCC);
|
||||
|
||||
@ -29784,6 +29790,8 @@ SDValue X86TargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const {
|
||||
case ISD::UMULO: return LowerXALUO(Op, DAG);
|
||||
case ISD::READCYCLECOUNTER: return LowerREADCYCLECOUNTER(Op, Subtarget,DAG);
|
||||
case ISD::BITCAST: return LowerBITCAST(Op, Subtarget, DAG);
|
||||
case ISD::SADDO_CARRY:
|
||||
case ISD::SSUBO_CARRY:
|
||||
case ISD::ADDCARRY:
|
||||
case ISD::SUBCARRY: return LowerADDSUBCARRY(Op, DAG);
|
||||
case ISD::ADD:
|
||||
|
@ -139,26 +139,12 @@ define i128 @knownbits_mask_addc_shl(i64 %a0, i64 %a1, i64 %a2) nounwind {
|
||||
define {i32, i1} @knownbits_uaddo_saddo(i64 %a0, i64 %a1) nounwind {
|
||||
; X32-LABEL: knownbits_uaddo_saddo:
|
||||
; X32: # %bb.0:
|
||||
; X32-NEXT: pushl %ebx
|
||||
; X32-NEXT: movl {{[0-9]+}}(%esp), %eax
|
||||
; X32-NEXT: movl {{[0-9]+}}(%esp), %ecx
|
||||
; X32-NEXT: movl %ecx, %edx
|
||||
; X32-NEXT: addl %eax, %edx
|
||||
; X32-NEXT: setb %bl
|
||||
; X32-NEXT: testl %eax, %eax
|
||||
; X32-NEXT: setns %al
|
||||
; X32-NEXT: testl %ecx, %ecx
|
||||
; X32-NEXT: setns %cl
|
||||
; X32-NEXT: cmpb %al, %cl
|
||||
; X32-NEXT: sete %al
|
||||
; X32-NEXT: testl %edx, %edx
|
||||
; X32-NEXT: setns %dl
|
||||
; X32-NEXT: cmpb %dl, %cl
|
||||
; X32-NEXT: setne %dl
|
||||
; X32-NEXT: andb %al, %dl
|
||||
; X32-NEXT: orb %bl, %dl
|
||||
; X32-NEXT: addl {{[0-9]+}}(%esp), %eax
|
||||
; X32-NEXT: setb %al
|
||||
; X32-NEXT: seto %dl
|
||||
; X32-NEXT: orb %al, %dl
|
||||
; X32-NEXT: xorl %eax, %eax
|
||||
; X32-NEXT: popl %ebx
|
||||
; X32-NEXT: retl
|
||||
;
|
||||
; X64-LABEL: knownbits_uaddo_saddo:
|
||||
@ -191,20 +177,10 @@ define {i32, i1} @knownbits_usubo_ssubo(i64 %a0, i64 %a1) nounwind {
|
||||
; X32-LABEL: knownbits_usubo_ssubo:
|
||||
; X32: # %bb.0:
|
||||
; X32-NEXT: movl {{[0-9]+}}(%esp), %eax
|
||||
; X32-NEXT: movl {{[0-9]+}}(%esp), %ecx
|
||||
; X32-NEXT: cmpl %eax, %ecx
|
||||
; X32-NEXT: setb %dh
|
||||
; X32-NEXT: setns %dl
|
||||
; X32-NEXT: testl %ecx, %ecx
|
||||
; X32-NEXT: setns %cl
|
||||
; X32-NEXT: cmpb %dl, %cl
|
||||
; X32-NEXT: setne %ch
|
||||
; X32-NEXT: testl %eax, %eax
|
||||
; X32-NEXT: setns %al
|
||||
; X32-NEXT: cmpb %al, %cl
|
||||
; X32-NEXT: setne %dl
|
||||
; X32-NEXT: andb %ch, %dl
|
||||
; X32-NEXT: orb %dh, %dl
|
||||
; X32-NEXT: cmpl {{[0-9]+}}(%esp), %eax
|
||||
; X32-NEXT: setb %al
|
||||
; X32-NEXT: seto %dl
|
||||
; X32-NEXT: orb %al, %dl
|
||||
; X32-NEXT: xorl %eax, %eax
|
||||
; X32-NEXT: retl
|
||||
;
|
||||
|
@ -42,38 +42,25 @@ define i32 @func(i32 %x, i32 %y) nounwind {
|
||||
define i64 @func2(i64 %x, i64 %y) nounwind {
|
||||
; X86-LABEL: func2:
|
||||
; X86: # %bb.0:
|
||||
; X86-NEXT: pushl %ebp
|
||||
; X86-NEXT: pushl %ebx
|
||||
; X86-NEXT: pushl %edi
|
||||
; X86-NEXT: pushl %esi
|
||||
; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
|
||||
; X86-NEXT: movl {{[0-9]+}}(%esp), %esi
|
||||
; X86-NEXT: movl {{[0-9]+}}(%esp), %edi
|
||||
; X86-NEXT: movl {{[0-9]+}}(%esp), %ebx
|
||||
; X86-NEXT: addl {{[0-9]+}}(%esp), %edi
|
||||
; X86-NEXT: movl %ebx, %ebp
|
||||
; X86-NEXT: adcl %esi, %ebp
|
||||
; X86-NEXT: movl %ebp, %eax
|
||||
; X86-NEXT: addl {{[0-9]+}}(%esp), %ecx
|
||||
; X86-NEXT: adcl {{[0-9]+}}(%esp), %esi
|
||||
; X86-NEXT: seto %bl
|
||||
; X86-NEXT: movl %esi, %eax
|
||||
; X86-NEXT: sarl $31, %eax
|
||||
; X86-NEXT: xorl %ecx, %ecx
|
||||
; X86-NEXT: testl %ebp, %ebp
|
||||
; X86-NEXT: setns %cl
|
||||
; X86-NEXT: movl %ecx, %edx
|
||||
; X86-NEXT: addl $2147483647, %edx # imm = 0x7FFFFFFF
|
||||
; X86-NEXT: testl %ebx, %ebx
|
||||
; X86-NEXT: setns %bl
|
||||
; X86-NEXT: cmpb %cl, %bl
|
||||
; X86-NEXT: setne %cl
|
||||
; X86-NEXT: testb %bl, %bl
|
||||
; X86-NEXT: cmovel %ecx, %eax
|
||||
; X86-NEXT: xorl %edx, %edx
|
||||
; X86-NEXT: testl %esi, %esi
|
||||
; X86-NEXT: setns %ch
|
||||
; X86-NEXT: cmpb %ch, %bl
|
||||
; X86-NEXT: sete %ch
|
||||
; X86-NEXT: testb %cl, %ch
|
||||
; X86-NEXT: cmovel %ebp, %edx
|
||||
; X86-NEXT: cmovel %edi, %eax
|
||||
; X86-NEXT: setns %dl
|
||||
; X86-NEXT: addl $2147483647, %edx # imm = 0x7FFFFFFF
|
||||
; X86-NEXT: testb %bl, %bl
|
||||
; X86-NEXT: cmovel %esi, %edx
|
||||
; X86-NEXT: popl %esi
|
||||
; X86-NEXT: popl %edi
|
||||
; X86-NEXT: popl %ebx
|
||||
; X86-NEXT: popl %ebp
|
||||
; X86-NEXT: retl
|
||||
;
|
||||
; X64-LABEL: func2:
|
||||
|
@ -44,38 +44,25 @@ define i32 @func32(i32 %x, i32 %y, i32 %z) nounwind {
|
||||
define i64 @func64(i64 %x, i64 %y, i64 %z) nounwind {
|
||||
; X86-LABEL: func64:
|
||||
; X86: # %bb.0:
|
||||
; X86-NEXT: pushl %ebp
|
||||
; X86-NEXT: pushl %ebx
|
||||
; X86-NEXT: pushl %edi
|
||||
; X86-NEXT: pushl %esi
|
||||
; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
|
||||
; X86-NEXT: movl {{[0-9]+}}(%esp), %esi
|
||||
; X86-NEXT: movl {{[0-9]+}}(%esp), %edi
|
||||
; X86-NEXT: movl {{[0-9]+}}(%esp), %ebx
|
||||
; X86-NEXT: addl {{[0-9]+}}(%esp), %edi
|
||||
; X86-NEXT: movl %ebx, %ebp
|
||||
; X86-NEXT: adcl %esi, %ebp
|
||||
; X86-NEXT: movl %ebp, %eax
|
||||
; X86-NEXT: addl {{[0-9]+}}(%esp), %ecx
|
||||
; X86-NEXT: adcl {{[0-9]+}}(%esp), %esi
|
||||
; X86-NEXT: seto %bl
|
||||
; X86-NEXT: movl %esi, %eax
|
||||
; X86-NEXT: sarl $31, %eax
|
||||
; X86-NEXT: xorl %ecx, %ecx
|
||||
; X86-NEXT: testl %ebp, %ebp
|
||||
; X86-NEXT: setns %cl
|
||||
; X86-NEXT: movl %ecx, %edx
|
||||
; X86-NEXT: addl $2147483647, %edx # imm = 0x7FFFFFFF
|
||||
; X86-NEXT: testl %ebx, %ebx
|
||||
; X86-NEXT: setns %bl
|
||||
; X86-NEXT: cmpb %cl, %bl
|
||||
; X86-NEXT: setne %cl
|
||||
; X86-NEXT: testb %bl, %bl
|
||||
; X86-NEXT: cmovel %ecx, %eax
|
||||
; X86-NEXT: xorl %edx, %edx
|
||||
; X86-NEXT: testl %esi, %esi
|
||||
; X86-NEXT: setns %ch
|
||||
; X86-NEXT: cmpb %ch, %bl
|
||||
; X86-NEXT: sete %ch
|
||||
; X86-NEXT: testb %cl, %ch
|
||||
; X86-NEXT: cmovel %ebp, %edx
|
||||
; X86-NEXT: cmovel %edi, %eax
|
||||
; X86-NEXT: setns %dl
|
||||
; X86-NEXT: addl $2147483647, %edx # imm = 0x7FFFFFFF
|
||||
; X86-NEXT: testb %bl, %bl
|
||||
; X86-NEXT: cmovel %esi, %edx
|
||||
; X86-NEXT: popl %esi
|
||||
; X86-NEXT: popl %edi
|
||||
; X86-NEXT: popl %ebx
|
||||
; X86-NEXT: popl %ebp
|
||||
; X86-NEXT: retl
|
||||
;
|
||||
; X64-LABEL: func64:
|
||||
|
@ -1940,124 +1940,78 @@ define <8 x i64> @v8i64(<8 x i64> %x, <8 x i64> %y) nounwind {
|
||||
define <2 x i128> @v2i128(<2 x i128> %x, <2 x i128> %y) nounwind {
|
||||
; SSE-LABEL: v2i128:
|
||||
; SSE: # %bb.0:
|
||||
; SSE-NEXT: pushq %r15
|
||||
; SSE-NEXT: pushq %r14
|
||||
; SSE-NEXT: pushq %r13
|
||||
; SSE-NEXT: pushq %r12
|
||||
; SSE-NEXT: pushq %rbx
|
||||
; SSE-NEXT: movq %rdi, %rax
|
||||
; SSE-NEXT: movq {{[0-9]+}}(%rsp), %r11
|
||||
; SSE-NEXT: movq {{[0-9]+}}(%rsp), %r14
|
||||
; SSE-NEXT: addq {{[0-9]+}}(%rsp), %rcx
|
||||
; SSE-NEXT: movq %r8, %r13
|
||||
; SSE-NEXT: adcq %r14, %r13
|
||||
; SSE-NEXT: movq %r13, %r10
|
||||
; SSE-NEXT: sarq $63, %r10
|
||||
; SSE-NEXT: xorl %edi, %edi
|
||||
; SSE-NEXT: testq %r13, %r13
|
||||
; SSE-NEXT: setns %dil
|
||||
; SSE-NEXT: movabsq $9223372036854775807, %r12 # imm = 0x7FFFFFFFFFFFFFFF
|
||||
; SSE-NEXT: leaq (%rdi,%r12), %r15
|
||||
; SSE-NEXT: adcq {{[0-9]+}}(%rsp), %r8
|
||||
; SSE-NEXT: seto %r10b
|
||||
; SSE-NEXT: movq %r8, %rbx
|
||||
; SSE-NEXT: sarq $63, %rbx
|
||||
; SSE-NEXT: testb %r10b, %r10b
|
||||
; SSE-NEXT: cmoveq %rcx, %rbx
|
||||
; SSE-NEXT: xorl %ecx, %ecx
|
||||
; SSE-NEXT: testq %r8, %r8
|
||||
; SSE-NEXT: setns %r8b
|
||||
; SSE-NEXT: cmpb %dil, %r8b
|
||||
; SSE-NEXT: setne %dil
|
||||
; SSE-NEXT: testq %r14, %r14
|
||||
; SSE-NEXT: setns %bl
|
||||
; SSE-NEXT: cmpb %bl, %r8b
|
||||
; SSE-NEXT: sete %bl
|
||||
; SSE-NEXT: testb %dil, %bl
|
||||
; SSE-NEXT: cmoveq %r13, %r15
|
||||
; SSE-NEXT: cmoveq %rcx, %r10
|
||||
; SSE-NEXT: setns %cl
|
||||
; SSE-NEXT: movabsq $9223372036854775807, %r11 # imm = 0x7FFFFFFFFFFFFFFF
|
||||
; SSE-NEXT: addq %r11, %rcx
|
||||
; SSE-NEXT: testb %r10b, %r10b
|
||||
; SSE-NEXT: cmoveq %r8, %rcx
|
||||
; SSE-NEXT: addq %r9, %rsi
|
||||
; SSE-NEXT: adcq {{[0-9]+}}(%rsp), %rdx
|
||||
; SSE-NEXT: seto %r8b
|
||||
; SSE-NEXT: movq %rdx, %rdi
|
||||
; SSE-NEXT: adcq %r11, %rdi
|
||||
; SSE-NEXT: setns %bl
|
||||
; SSE-NEXT: movzbl %bl, %ebx
|
||||
; SSE-NEXT: addq %rbx, %r12
|
||||
; SSE-NEXT: movq %rdi, %rcx
|
||||
; SSE-NEXT: sarq $63, %rcx
|
||||
; SSE-NEXT: testq %r11, %r11
|
||||
; SSE-NEXT: setns %r8b
|
||||
; SSE-NEXT: sarq $63, %rdi
|
||||
; SSE-NEXT: testb %r8b, %r8b
|
||||
; SSE-NEXT: cmoveq %rsi, %rdi
|
||||
; SSE-NEXT: xorl %esi, %esi
|
||||
; SSE-NEXT: testq %rdx, %rdx
|
||||
; SSE-NEXT: setns %dl
|
||||
; SSE-NEXT: cmpb %r8b, %dl
|
||||
; SSE-NEXT: sete %r8b
|
||||
; SSE-NEXT: cmpb %bl, %dl
|
||||
; SSE-NEXT: setne %dl
|
||||
; SSE-NEXT: testb %dl, %r8b
|
||||
; SSE-NEXT: cmoveq %rsi, %rcx
|
||||
; SSE-NEXT: cmoveq %rdi, %r12
|
||||
; SSE-NEXT: movq %r15, 24(%rax)
|
||||
; SSE-NEXT: movq %r10, 16(%rax)
|
||||
; SSE-NEXT: movq %r12, 8(%rax)
|
||||
; SSE-NEXT: movq %rcx, (%rax)
|
||||
; SSE-NEXT: setns %sil
|
||||
; SSE-NEXT: addq %r11, %rsi
|
||||
; SSE-NEXT: testb %r8b, %r8b
|
||||
; SSE-NEXT: cmoveq %rdx, %rsi
|
||||
; SSE-NEXT: movq %rbx, 16(%rax)
|
||||
; SSE-NEXT: movq %rdi, (%rax)
|
||||
; SSE-NEXT: movq %rcx, 24(%rax)
|
||||
; SSE-NEXT: movq %rsi, 8(%rax)
|
||||
; SSE-NEXT: popq %rbx
|
||||
; SSE-NEXT: popq %r12
|
||||
; SSE-NEXT: popq %r13
|
||||
; SSE-NEXT: popq %r14
|
||||
; SSE-NEXT: popq %r15
|
||||
; SSE-NEXT: retq
|
||||
;
|
||||
; AVX-LABEL: v2i128:
|
||||
; AVX: # %bb.0:
|
||||
; AVX-NEXT: pushq %r15
|
||||
; AVX-NEXT: pushq %r14
|
||||
; AVX-NEXT: pushq %r13
|
||||
; AVX-NEXT: pushq %r12
|
||||
; AVX-NEXT: pushq %rbx
|
||||
; AVX-NEXT: movq %rdi, %rax
|
||||
; AVX-NEXT: movq {{[0-9]+}}(%rsp), %r11
|
||||
; AVX-NEXT: movq {{[0-9]+}}(%rsp), %r14
|
||||
; AVX-NEXT: addq {{[0-9]+}}(%rsp), %rcx
|
||||
; AVX-NEXT: movq %r8, %r13
|
||||
; AVX-NEXT: adcq %r14, %r13
|
||||
; AVX-NEXT: movq %r13, %r10
|
||||
; AVX-NEXT: sarq $63, %r10
|
||||
; AVX-NEXT: xorl %edi, %edi
|
||||
; AVX-NEXT: testq %r13, %r13
|
||||
; AVX-NEXT: setns %dil
|
||||
; AVX-NEXT: movabsq $9223372036854775807, %r12 # imm = 0x7FFFFFFFFFFFFFFF
|
||||
; AVX-NEXT: leaq (%rdi,%r12), %r15
|
||||
; AVX-NEXT: adcq {{[0-9]+}}(%rsp), %r8
|
||||
; AVX-NEXT: seto %r10b
|
||||
; AVX-NEXT: movq %r8, %rbx
|
||||
; AVX-NEXT: sarq $63, %rbx
|
||||
; AVX-NEXT: testb %r10b, %r10b
|
||||
; AVX-NEXT: cmoveq %rcx, %rbx
|
||||
; AVX-NEXT: xorl %ecx, %ecx
|
||||
; AVX-NEXT: testq %r8, %r8
|
||||
; AVX-NEXT: setns %r8b
|
||||
; AVX-NEXT: cmpb %dil, %r8b
|
||||
; AVX-NEXT: setne %dil
|
||||
; AVX-NEXT: testq %r14, %r14
|
||||
; AVX-NEXT: setns %bl
|
||||
; AVX-NEXT: cmpb %bl, %r8b
|
||||
; AVX-NEXT: sete %bl
|
||||
; AVX-NEXT: testb %dil, %bl
|
||||
; AVX-NEXT: cmoveq %r13, %r15
|
||||
; AVX-NEXT: cmoveq %rcx, %r10
|
||||
; AVX-NEXT: setns %cl
|
||||
; AVX-NEXT: movabsq $9223372036854775807, %r11 # imm = 0x7FFFFFFFFFFFFFFF
|
||||
; AVX-NEXT: addq %r11, %rcx
|
||||
; AVX-NEXT: testb %r10b, %r10b
|
||||
; AVX-NEXT: cmoveq %r8, %rcx
|
||||
; AVX-NEXT: addq %r9, %rsi
|
||||
; AVX-NEXT: adcq {{[0-9]+}}(%rsp), %rdx
|
||||
; AVX-NEXT: seto %r8b
|
||||
; AVX-NEXT: movq %rdx, %rdi
|
||||
; AVX-NEXT: adcq %r11, %rdi
|
||||
; AVX-NEXT: setns %bl
|
||||
; AVX-NEXT: movzbl %bl, %ebx
|
||||
; AVX-NEXT: addq %rbx, %r12
|
||||
; AVX-NEXT: movq %rdi, %rcx
|
||||
; AVX-NEXT: sarq $63, %rcx
|
||||
; AVX-NEXT: testq %r11, %r11
|
||||
; AVX-NEXT: setns %r8b
|
||||
; AVX-NEXT: sarq $63, %rdi
|
||||
; AVX-NEXT: testb %r8b, %r8b
|
||||
; AVX-NEXT: cmoveq %rsi, %rdi
|
||||
; AVX-NEXT: xorl %esi, %esi
|
||||
; AVX-NEXT: testq %rdx, %rdx
|
||||
; AVX-NEXT: setns %dl
|
||||
; AVX-NEXT: cmpb %r8b, %dl
|
||||
; AVX-NEXT: sete %r8b
|
||||
; AVX-NEXT: cmpb %bl, %dl
|
||||
; AVX-NEXT: setne %dl
|
||||
; AVX-NEXT: testb %dl, %r8b
|
||||
; AVX-NEXT: cmoveq %rsi, %rcx
|
||||
; AVX-NEXT: cmoveq %rdi, %r12
|
||||
; AVX-NEXT: movq %r15, 24(%rax)
|
||||
; AVX-NEXT: movq %r10, 16(%rax)
|
||||
; AVX-NEXT: movq %r12, 8(%rax)
|
||||
; AVX-NEXT: movq %rcx, (%rax)
|
||||
; AVX-NEXT: setns %sil
|
||||
; AVX-NEXT: addq %r11, %rsi
|
||||
; AVX-NEXT: testb %r8b, %r8b
|
||||
; AVX-NEXT: cmoveq %rdx, %rsi
|
||||
; AVX-NEXT: movq %rbx, 16(%rax)
|
||||
; AVX-NEXT: movq %rdi, (%rax)
|
||||
; AVX-NEXT: movq %rcx, 24(%rax)
|
||||
; AVX-NEXT: movq %rsi, 8(%rax)
|
||||
; AVX-NEXT: popq %rbx
|
||||
; AVX-NEXT: popq %r12
|
||||
; AVX-NEXT: popq %r13
|
||||
; AVX-NEXT: popq %r14
|
||||
; AVX-NEXT: popq %r15
|
||||
; AVX-NEXT: retq
|
||||
%z = call <2 x i128> @llvm.sadd.sat.v2i128(<2 x i128> %x, <2 x i128> %y)
|
||||
ret <2 x i128> %z
|
||||
|
@ -38,38 +38,25 @@ define i32 @func(i32 %x, i32 %y) nounwind {
|
||||
define i64 @func2(i64 %x, i64 %y) nounwind {
|
||||
; X86-LABEL: func2:
|
||||
; X86: # %bb.0:
|
||||
; X86-NEXT: pushl %ebp
|
||||
; X86-NEXT: pushl %ebx
|
||||
; X86-NEXT: pushl %edi
|
||||
; X86-NEXT: pushl %esi
|
||||
; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
|
||||
; X86-NEXT: movl {{[0-9]+}}(%esp), %esi
|
||||
; X86-NEXT: movl {{[0-9]+}}(%esp), %edi
|
||||
; X86-NEXT: movl {{[0-9]+}}(%esp), %ebx
|
||||
; X86-NEXT: subl {{[0-9]+}}(%esp), %edi
|
||||
; X86-NEXT: movl %ebx, %ebp
|
||||
; X86-NEXT: sbbl %esi, %ebp
|
||||
; X86-NEXT: movl %ebp, %eax
|
||||
; X86-NEXT: subl {{[0-9]+}}(%esp), %ecx
|
||||
; X86-NEXT: sbbl {{[0-9]+}}(%esp), %esi
|
||||
; X86-NEXT: seto %bl
|
||||
; X86-NEXT: movl %esi, %eax
|
||||
; X86-NEXT: sarl $31, %eax
|
||||
; X86-NEXT: xorl %ecx, %ecx
|
||||
; X86-NEXT: testl %ebp, %ebp
|
||||
; X86-NEXT: setns %cl
|
||||
; X86-NEXT: movl %ecx, %edx
|
||||
; X86-NEXT: addl $2147483647, %edx # imm = 0x7FFFFFFF
|
||||
; X86-NEXT: testl %ebx, %ebx
|
||||
; X86-NEXT: setns %bl
|
||||
; X86-NEXT: cmpb %cl, %bl
|
||||
; X86-NEXT: setne %cl
|
||||
; X86-NEXT: testb %bl, %bl
|
||||
; X86-NEXT: cmovel %ecx, %eax
|
||||
; X86-NEXT: xorl %edx, %edx
|
||||
; X86-NEXT: testl %esi, %esi
|
||||
; X86-NEXT: setns %ch
|
||||
; X86-NEXT: cmpb %ch, %bl
|
||||
; X86-NEXT: setne %ch
|
||||
; X86-NEXT: testb %cl, %ch
|
||||
; X86-NEXT: cmovel %ebp, %edx
|
||||
; X86-NEXT: cmovel %edi, %eax
|
||||
; X86-NEXT: setns %dl
|
||||
; X86-NEXT: addl $2147483647, %edx # imm = 0x7FFFFFFF
|
||||
; X86-NEXT: testb %bl, %bl
|
||||
; X86-NEXT: cmovel %esi, %edx
|
||||
; X86-NEXT: popl %esi
|
||||
; X86-NEXT: popl %edi
|
||||
; X86-NEXT: popl %ebx
|
||||
; X86-NEXT: popl %ebp
|
||||
; X86-NEXT: retl
|
||||
;
|
||||
; X64-LABEL: func2:
|
||||
|
@ -40,38 +40,25 @@ define i32 @func32(i32 %x, i32 %y, i32 %z) nounwind {
|
||||
define i64 @func64(i64 %x, i64 %y, i64 %z) nounwind {
|
||||
; X86-LABEL: func64:
|
||||
; X86: # %bb.0:
|
||||
; X86-NEXT: pushl %ebp
|
||||
; X86-NEXT: pushl %ebx
|
||||
; X86-NEXT: pushl %edi
|
||||
; X86-NEXT: pushl %esi
|
||||
; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
|
||||
; X86-NEXT: movl {{[0-9]+}}(%esp), %esi
|
||||
; X86-NEXT: movl {{[0-9]+}}(%esp), %edi
|
||||
; X86-NEXT: movl {{[0-9]+}}(%esp), %ebx
|
||||
; X86-NEXT: subl {{[0-9]+}}(%esp), %edi
|
||||
; X86-NEXT: movl %ebx, %ebp
|
||||
; X86-NEXT: sbbl %esi, %ebp
|
||||
; X86-NEXT: movl %ebp, %eax
|
||||
; X86-NEXT: subl {{[0-9]+}}(%esp), %ecx
|
||||
; X86-NEXT: sbbl {{[0-9]+}}(%esp), %esi
|
||||
; X86-NEXT: seto %bl
|
||||
; X86-NEXT: movl %esi, %eax
|
||||
; X86-NEXT: sarl $31, %eax
|
||||
; X86-NEXT: xorl %ecx, %ecx
|
||||
; X86-NEXT: testl %ebp, %ebp
|
||||
; X86-NEXT: setns %cl
|
||||
; X86-NEXT: movl %ecx, %edx
|
||||
; X86-NEXT: addl $2147483647, %edx # imm = 0x7FFFFFFF
|
||||
; X86-NEXT: testl %ebx, %ebx
|
||||
; X86-NEXT: setns %bl
|
||||
; X86-NEXT: cmpb %cl, %bl
|
||||
; X86-NEXT: setne %cl
|
||||
; X86-NEXT: testb %bl, %bl
|
||||
; X86-NEXT: cmovel %ecx, %eax
|
||||
; X86-NEXT: xorl %edx, %edx
|
||||
; X86-NEXT: testl %esi, %esi
|
||||
; X86-NEXT: setns %ch
|
||||
; X86-NEXT: cmpb %ch, %bl
|
||||
; X86-NEXT: setne %ch
|
||||
; X86-NEXT: testb %cl, %ch
|
||||
; X86-NEXT: cmovel %ebp, %edx
|
||||
; X86-NEXT: cmovel %edi, %eax
|
||||
; X86-NEXT: setns %dl
|
||||
; X86-NEXT: addl $2147483647, %edx # imm = 0x7FFFFFFF
|
||||
; X86-NEXT: testb %bl, %bl
|
||||
; X86-NEXT: cmovel %esi, %edx
|
||||
; X86-NEXT: popl %esi
|
||||
; X86-NEXT: popl %edi
|
||||
; X86-NEXT: popl %ebx
|
||||
; X86-NEXT: popl %ebp
|
||||
; X86-NEXT: retl
|
||||
;
|
||||
; X64-LABEL: func64:
|
||||
|
@ -2145,124 +2145,78 @@ define <8 x i64> @v8i64(<8 x i64> %x, <8 x i64> %y) nounwind {
|
||||
define <2 x i128> @v2i128(<2 x i128> %x, <2 x i128> %y) nounwind {
|
||||
; SSE-LABEL: v2i128:
|
||||
; SSE: # %bb.0:
|
||||
; SSE-NEXT: pushq %r15
|
||||
; SSE-NEXT: pushq %r14
|
||||
; SSE-NEXT: pushq %r13
|
||||
; SSE-NEXT: pushq %r12
|
||||
; SSE-NEXT: pushq %rbx
|
||||
; SSE-NEXT: movq %rdi, %rax
|
||||
; SSE-NEXT: movq {{[0-9]+}}(%rsp), %r11
|
||||
; SSE-NEXT: movq {{[0-9]+}}(%rsp), %r14
|
||||
; SSE-NEXT: subq {{[0-9]+}}(%rsp), %rcx
|
||||
; SSE-NEXT: movq %r8, %r13
|
||||
; SSE-NEXT: sbbq %r14, %r13
|
||||
; SSE-NEXT: movq %r13, %r10
|
||||
; SSE-NEXT: sarq $63, %r10
|
||||
; SSE-NEXT: xorl %edi, %edi
|
||||
; SSE-NEXT: testq %r13, %r13
|
||||
; SSE-NEXT: setns %dil
|
||||
; SSE-NEXT: movabsq $9223372036854775807, %r12 # imm = 0x7FFFFFFFFFFFFFFF
|
||||
; SSE-NEXT: leaq (%rdi,%r12), %r15
|
||||
; SSE-NEXT: sbbq {{[0-9]+}}(%rsp), %r8
|
||||
; SSE-NEXT: seto %r10b
|
||||
; SSE-NEXT: movq %r8, %rbx
|
||||
; SSE-NEXT: sarq $63, %rbx
|
||||
; SSE-NEXT: testb %r10b, %r10b
|
||||
; SSE-NEXT: cmoveq %rcx, %rbx
|
||||
; SSE-NEXT: xorl %ecx, %ecx
|
||||
; SSE-NEXT: testq %r8, %r8
|
||||
; SSE-NEXT: setns %r8b
|
||||
; SSE-NEXT: cmpb %dil, %r8b
|
||||
; SSE-NEXT: setne %dil
|
||||
; SSE-NEXT: testq %r14, %r14
|
||||
; SSE-NEXT: setns %bl
|
||||
; SSE-NEXT: cmpb %bl, %r8b
|
||||
; SSE-NEXT: setne %bl
|
||||
; SSE-NEXT: testb %dil, %bl
|
||||
; SSE-NEXT: cmoveq %r13, %r15
|
||||
; SSE-NEXT: cmoveq %rcx, %r10
|
||||
; SSE-NEXT: setns %cl
|
||||
; SSE-NEXT: movabsq $9223372036854775807, %r11 # imm = 0x7FFFFFFFFFFFFFFF
|
||||
; SSE-NEXT: addq %r11, %rcx
|
||||
; SSE-NEXT: testb %r10b, %r10b
|
||||
; SSE-NEXT: cmoveq %r8, %rcx
|
||||
; SSE-NEXT: subq %r9, %rsi
|
||||
; SSE-NEXT: sbbq {{[0-9]+}}(%rsp), %rdx
|
||||
; SSE-NEXT: seto %r8b
|
||||
; SSE-NEXT: movq %rdx, %rdi
|
||||
; SSE-NEXT: sbbq %r11, %rdi
|
||||
; SSE-NEXT: setns %bl
|
||||
; SSE-NEXT: movzbl %bl, %ebx
|
||||
; SSE-NEXT: addq %rbx, %r12
|
||||
; SSE-NEXT: movq %rdi, %rcx
|
||||
; SSE-NEXT: sarq $63, %rcx
|
||||
; SSE-NEXT: testq %r11, %r11
|
||||
; SSE-NEXT: setns %r8b
|
||||
; SSE-NEXT: sarq $63, %rdi
|
||||
; SSE-NEXT: testb %r8b, %r8b
|
||||
; SSE-NEXT: cmoveq %rsi, %rdi
|
||||
; SSE-NEXT: xorl %esi, %esi
|
||||
; SSE-NEXT: testq %rdx, %rdx
|
||||
; SSE-NEXT: setns %dl
|
||||
; SSE-NEXT: cmpb %r8b, %dl
|
||||
; SSE-NEXT: setne %r8b
|
||||
; SSE-NEXT: cmpb %bl, %dl
|
||||
; SSE-NEXT: setne %dl
|
||||
; SSE-NEXT: testb %dl, %r8b
|
||||
; SSE-NEXT: cmoveq %rsi, %rcx
|
||||
; SSE-NEXT: cmoveq %rdi, %r12
|
||||
; SSE-NEXT: movq %r15, 24(%rax)
|
||||
; SSE-NEXT: movq %r10, 16(%rax)
|
||||
; SSE-NEXT: movq %r12, 8(%rax)
|
||||
; SSE-NEXT: movq %rcx, (%rax)
|
||||
; SSE-NEXT: setns %sil
|
||||
; SSE-NEXT: addq %r11, %rsi
|
||||
; SSE-NEXT: testb %r8b, %r8b
|
||||
; SSE-NEXT: cmoveq %rdx, %rsi
|
||||
; SSE-NEXT: movq %rbx, 16(%rax)
|
||||
; SSE-NEXT: movq %rdi, (%rax)
|
||||
; SSE-NEXT: movq %rcx, 24(%rax)
|
||||
; SSE-NEXT: movq %rsi, 8(%rax)
|
||||
; SSE-NEXT: popq %rbx
|
||||
; SSE-NEXT: popq %r12
|
||||
; SSE-NEXT: popq %r13
|
||||
; SSE-NEXT: popq %r14
|
||||
; SSE-NEXT: popq %r15
|
||||
; SSE-NEXT: retq
|
||||
;
|
||||
; AVX-LABEL: v2i128:
|
||||
; AVX: # %bb.0:
|
||||
; AVX-NEXT: pushq %r15
|
||||
; AVX-NEXT: pushq %r14
|
||||
; AVX-NEXT: pushq %r13
|
||||
; AVX-NEXT: pushq %r12
|
||||
; AVX-NEXT: pushq %rbx
|
||||
; AVX-NEXT: movq %rdi, %rax
|
||||
; AVX-NEXT: movq {{[0-9]+}}(%rsp), %r11
|
||||
; AVX-NEXT: movq {{[0-9]+}}(%rsp), %r14
|
||||
; AVX-NEXT: subq {{[0-9]+}}(%rsp), %rcx
|
||||
; AVX-NEXT: movq %r8, %r13
|
||||
; AVX-NEXT: sbbq %r14, %r13
|
||||
; AVX-NEXT: movq %r13, %r10
|
||||
; AVX-NEXT: sarq $63, %r10
|
||||
; AVX-NEXT: xorl %edi, %edi
|
||||
; AVX-NEXT: testq %r13, %r13
|
||||
; AVX-NEXT: setns %dil
|
||||
; AVX-NEXT: movabsq $9223372036854775807, %r12 # imm = 0x7FFFFFFFFFFFFFFF
|
||||
; AVX-NEXT: leaq (%rdi,%r12), %r15
|
||||
; AVX-NEXT: sbbq {{[0-9]+}}(%rsp), %r8
|
||||
; AVX-NEXT: seto %r10b
|
||||
; AVX-NEXT: movq %r8, %rbx
|
||||
; AVX-NEXT: sarq $63, %rbx
|
||||
; AVX-NEXT: testb %r10b, %r10b
|
||||
; AVX-NEXT: cmoveq %rcx, %rbx
|
||||
; AVX-NEXT: xorl %ecx, %ecx
|
||||
; AVX-NEXT: testq %r8, %r8
|
||||
; AVX-NEXT: setns %r8b
|
||||
; AVX-NEXT: cmpb %dil, %r8b
|
||||
; AVX-NEXT: setne %dil
|
||||
; AVX-NEXT: testq %r14, %r14
|
||||
; AVX-NEXT: setns %bl
|
||||
; AVX-NEXT: cmpb %bl, %r8b
|
||||
; AVX-NEXT: setne %bl
|
||||
; AVX-NEXT: testb %dil, %bl
|
||||
; AVX-NEXT: cmoveq %r13, %r15
|
||||
; AVX-NEXT: cmoveq %rcx, %r10
|
||||
; AVX-NEXT: setns %cl
|
||||
; AVX-NEXT: movabsq $9223372036854775807, %r11 # imm = 0x7FFFFFFFFFFFFFFF
|
||||
; AVX-NEXT: addq %r11, %rcx
|
||||
; AVX-NEXT: testb %r10b, %r10b
|
||||
; AVX-NEXT: cmoveq %r8, %rcx
|
||||
; AVX-NEXT: subq %r9, %rsi
|
||||
; AVX-NEXT: sbbq {{[0-9]+}}(%rsp), %rdx
|
||||
; AVX-NEXT: seto %r8b
|
||||
; AVX-NEXT: movq %rdx, %rdi
|
||||
; AVX-NEXT: sbbq %r11, %rdi
|
||||
; AVX-NEXT: setns %bl
|
||||
; AVX-NEXT: movzbl %bl, %ebx
|
||||
; AVX-NEXT: addq %rbx, %r12
|
||||
; AVX-NEXT: movq %rdi, %rcx
|
||||
; AVX-NEXT: sarq $63, %rcx
|
||||
; AVX-NEXT: testq %r11, %r11
|
||||
; AVX-NEXT: setns %r8b
|
||||
; AVX-NEXT: sarq $63, %rdi
|
||||
; AVX-NEXT: testb %r8b, %r8b
|
||||
; AVX-NEXT: cmoveq %rsi, %rdi
|
||||
; AVX-NEXT: xorl %esi, %esi
|
||||
; AVX-NEXT: testq %rdx, %rdx
|
||||
; AVX-NEXT: setns %dl
|
||||
; AVX-NEXT: cmpb %r8b, %dl
|
||||
; AVX-NEXT: setne %r8b
|
||||
; AVX-NEXT: cmpb %bl, %dl
|
||||
; AVX-NEXT: setne %dl
|
||||
; AVX-NEXT: testb %dl, %r8b
|
||||
; AVX-NEXT: cmoveq %rsi, %rcx
|
||||
; AVX-NEXT: cmoveq %rdi, %r12
|
||||
; AVX-NEXT: movq %r15, 24(%rax)
|
||||
; AVX-NEXT: movq %r10, 16(%rax)
|
||||
; AVX-NEXT: movq %r12, 8(%rax)
|
||||
; AVX-NEXT: movq %rcx, (%rax)
|
||||
; AVX-NEXT: setns %sil
|
||||
; AVX-NEXT: addq %r11, %rsi
|
||||
; AVX-NEXT: testb %r8b, %r8b
|
||||
; AVX-NEXT: cmoveq %rdx, %rsi
|
||||
; AVX-NEXT: movq %rbx, 16(%rax)
|
||||
; AVX-NEXT: movq %rdi, (%rax)
|
||||
; AVX-NEXT: movq %rcx, 24(%rax)
|
||||
; AVX-NEXT: movq %rsi, 8(%rax)
|
||||
; AVX-NEXT: popq %rbx
|
||||
; AVX-NEXT: popq %r12
|
||||
; AVX-NEXT: popq %r13
|
||||
; AVX-NEXT: popq %r14
|
||||
; AVX-NEXT: popq %r15
|
||||
; AVX-NEXT: retq
|
||||
%z = call <2 x i128> @llvm.ssub.sat.v2i128(<2 x i128> %x, <2 x i128> %y)
|
||||
ret <2 x i128> %z
|
||||
|
@ -1145,275 +1145,131 @@ define <4 x i32> @saddo_v4i1(<4 x i1> %a0, <4 x i1> %a1, <4 x i1>* %p2) nounwind
|
||||
define <2 x i32> @saddo_v2i128(<2 x i128> %a0, <2 x i128> %a1, <2 x i128>* %p2) nounwind {
|
||||
; SSE2-LABEL: saddo_v2i128:
|
||||
; SSE2: # %bb.0:
|
||||
; SSE2-NEXT: pushq %rbp
|
||||
; SSE2-NEXT: pushq %rbx
|
||||
; SSE2-NEXT: movq {{[0-9]+}}(%rsp), %r11
|
||||
; SSE2-NEXT: movq {{[0-9]+}}(%rsp), %r10
|
||||
; SSE2-NEXT: testq %r9, %r9
|
||||
; SSE2-NEXT: setns %al
|
||||
; SSE2-NEXT: testq %rsi, %rsi
|
||||
; SSE2-NEXT: setns %bl
|
||||
; SSE2-NEXT: cmpb %al, %bl
|
||||
; SSE2-NEXT: sete %bpl
|
||||
; SSE2-NEXT: addq %r8, %rdi
|
||||
; SSE2-NEXT: adcq %r9, %rsi
|
||||
; SSE2-NEXT: setns %al
|
||||
; SSE2-NEXT: cmpb %al, %bl
|
||||
; SSE2-NEXT: setne %al
|
||||
; SSE2-NEXT: andb %bpl, %al
|
||||
; SSE2-NEXT: seto %r8b
|
||||
; SSE2-NEXT: addq {{[0-9]+}}(%rsp), %rdx
|
||||
; SSE2-NEXT: movq %rcx, %rbp
|
||||
; SSE2-NEXT: adcq %r10, %rbp
|
||||
; SSE2-NEXT: setns %bl
|
||||
; SSE2-NEXT: testq %rcx, %rcx
|
||||
; SSE2-NEXT: setns %cl
|
||||
; SSE2-NEXT: cmpb %bl, %cl
|
||||
; SSE2-NEXT: setne %r8b
|
||||
; SSE2-NEXT: testq %r10, %r10
|
||||
; SSE2-NEXT: setns %bl
|
||||
; SSE2-NEXT: cmpb %bl, %cl
|
||||
; SSE2-NEXT: sete %cl
|
||||
; SSE2-NEXT: andb %r8b, %cl
|
||||
; SSE2-NEXT: movzbl %cl, %ecx
|
||||
; SSE2-NEXT: negl %ecx
|
||||
; SSE2-NEXT: movd %ecx, %xmm1
|
||||
; SSE2-NEXT: adcq {{[0-9]+}}(%rsp), %rcx
|
||||
; SSE2-NEXT: seto %al
|
||||
; SSE2-NEXT: movzbl %al, %eax
|
||||
; SSE2-NEXT: negl %eax
|
||||
; SSE2-NEXT: movd %eax, %xmm1
|
||||
; SSE2-NEXT: movzbl %r8b, %eax
|
||||
; SSE2-NEXT: negl %eax
|
||||
; SSE2-NEXT: movd %eax, %xmm0
|
||||
; SSE2-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
|
||||
; SSE2-NEXT: movq %rdx, 16(%r11)
|
||||
; SSE2-NEXT: movq %rdi, (%r11)
|
||||
; SSE2-NEXT: movq %rbp, 24(%r11)
|
||||
; SSE2-NEXT: movq %rsi, 8(%r11)
|
||||
; SSE2-NEXT: popq %rbx
|
||||
; SSE2-NEXT: popq %rbp
|
||||
; SSE2-NEXT: movq %rdx, 16(%r10)
|
||||
; SSE2-NEXT: movq %rdi, (%r10)
|
||||
; SSE2-NEXT: movq %rcx, 24(%r10)
|
||||
; SSE2-NEXT: movq %rsi, 8(%r10)
|
||||
; SSE2-NEXT: retq
|
||||
;
|
||||
; SSSE3-LABEL: saddo_v2i128:
|
||||
; SSSE3: # %bb.0:
|
||||
; SSSE3-NEXT: pushq %rbp
|
||||
; SSSE3-NEXT: pushq %rbx
|
||||
; SSSE3-NEXT: movq {{[0-9]+}}(%rsp), %r11
|
||||
; SSSE3-NEXT: movq {{[0-9]+}}(%rsp), %r10
|
||||
; SSSE3-NEXT: testq %r9, %r9
|
||||
; SSSE3-NEXT: setns %al
|
||||
; SSSE3-NEXT: testq %rsi, %rsi
|
||||
; SSSE3-NEXT: setns %bl
|
||||
; SSSE3-NEXT: cmpb %al, %bl
|
||||
; SSSE3-NEXT: sete %bpl
|
||||
; SSSE3-NEXT: addq %r8, %rdi
|
||||
; SSSE3-NEXT: adcq %r9, %rsi
|
||||
; SSSE3-NEXT: setns %al
|
||||
; SSSE3-NEXT: cmpb %al, %bl
|
||||
; SSSE3-NEXT: setne %al
|
||||
; SSSE3-NEXT: andb %bpl, %al
|
||||
; SSSE3-NEXT: seto %r8b
|
||||
; SSSE3-NEXT: addq {{[0-9]+}}(%rsp), %rdx
|
||||
; SSSE3-NEXT: movq %rcx, %rbp
|
||||
; SSSE3-NEXT: adcq %r10, %rbp
|
||||
; SSSE3-NEXT: setns %bl
|
||||
; SSSE3-NEXT: testq %rcx, %rcx
|
||||
; SSSE3-NEXT: setns %cl
|
||||
; SSSE3-NEXT: cmpb %bl, %cl
|
||||
; SSSE3-NEXT: setne %r8b
|
||||
; SSSE3-NEXT: testq %r10, %r10
|
||||
; SSSE3-NEXT: setns %bl
|
||||
; SSSE3-NEXT: cmpb %bl, %cl
|
||||
; SSSE3-NEXT: sete %cl
|
||||
; SSSE3-NEXT: andb %r8b, %cl
|
||||
; SSSE3-NEXT: movzbl %cl, %ecx
|
||||
; SSSE3-NEXT: negl %ecx
|
||||
; SSSE3-NEXT: movd %ecx, %xmm1
|
||||
; SSSE3-NEXT: adcq {{[0-9]+}}(%rsp), %rcx
|
||||
; SSSE3-NEXT: seto %al
|
||||
; SSSE3-NEXT: movzbl %al, %eax
|
||||
; SSSE3-NEXT: negl %eax
|
||||
; SSSE3-NEXT: movd %eax, %xmm1
|
||||
; SSSE3-NEXT: movzbl %r8b, %eax
|
||||
; SSSE3-NEXT: negl %eax
|
||||
; SSSE3-NEXT: movd %eax, %xmm0
|
||||
; SSSE3-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
|
||||
; SSSE3-NEXT: movq %rdx, 16(%r11)
|
||||
; SSSE3-NEXT: movq %rdi, (%r11)
|
||||
; SSSE3-NEXT: movq %rbp, 24(%r11)
|
||||
; SSSE3-NEXT: movq %rsi, 8(%r11)
|
||||
; SSSE3-NEXT: popq %rbx
|
||||
; SSSE3-NEXT: popq %rbp
|
||||
; SSSE3-NEXT: movq %rdx, 16(%r10)
|
||||
; SSSE3-NEXT: movq %rdi, (%r10)
|
||||
; SSSE3-NEXT: movq %rcx, 24(%r10)
|
||||
; SSSE3-NEXT: movq %rsi, 8(%r10)
|
||||
; SSSE3-NEXT: retq
|
||||
;
|
||||
; SSE41-LABEL: saddo_v2i128:
|
||||
; SSE41: # %bb.0:
|
||||
; SSE41-NEXT: pushq %rbp
|
||||
; SSE41-NEXT: pushq %rbx
|
||||
; SSE41-NEXT: movq {{[0-9]+}}(%rsp), %r11
|
||||
; SSE41-NEXT: movq {{[0-9]+}}(%rsp), %r10
|
||||
; SSE41-NEXT: testq %r9, %r9
|
||||
; SSE41-NEXT: setns %al
|
||||
; SSE41-NEXT: testq %rsi, %rsi
|
||||
; SSE41-NEXT: setns %bl
|
||||
; SSE41-NEXT: cmpb %al, %bl
|
||||
; SSE41-NEXT: sete %bpl
|
||||
; SSE41-NEXT: addq %r8, %rdi
|
||||
; SSE41-NEXT: adcq %r9, %rsi
|
||||
; SSE41-NEXT: setns %al
|
||||
; SSE41-NEXT: cmpb %al, %bl
|
||||
; SSE41-NEXT: setne %al
|
||||
; SSE41-NEXT: andb %bpl, %al
|
||||
; SSE41-NEXT: seto %r8b
|
||||
; SSE41-NEXT: addq {{[0-9]+}}(%rsp), %rdx
|
||||
; SSE41-NEXT: movq %rcx, %rbp
|
||||
; SSE41-NEXT: adcq %r10, %rbp
|
||||
; SSE41-NEXT: setns %bl
|
||||
; SSE41-NEXT: testq %rcx, %rcx
|
||||
; SSE41-NEXT: setns %cl
|
||||
; SSE41-NEXT: cmpb %bl, %cl
|
||||
; SSE41-NEXT: setne %r8b
|
||||
; SSE41-NEXT: testq %r10, %r10
|
||||
; SSE41-NEXT: setns %bl
|
||||
; SSE41-NEXT: cmpb %bl, %cl
|
||||
; SSE41-NEXT: sete %cl
|
||||
; SSE41-NEXT: andb %r8b, %cl
|
||||
; SSE41-NEXT: movzbl %cl, %ecx
|
||||
; SSE41-NEXT: negl %ecx
|
||||
; SSE41-NEXT: movzbl %al, %eax
|
||||
; SSE41-NEXT: adcq {{[0-9]+}}(%rsp), %rcx
|
||||
; SSE41-NEXT: seto %al
|
||||
; SSE41-NEXT: movzbl %al, %r9d
|
||||
; SSE41-NEXT: negl %r9d
|
||||
; SSE41-NEXT: movzbl %r8b, %eax
|
||||
; SSE41-NEXT: negl %eax
|
||||
; SSE41-NEXT: movd %eax, %xmm0
|
||||
; SSE41-NEXT: pinsrd $1, %ecx, %xmm0
|
||||
; SSE41-NEXT: movq %rdx, 16(%r11)
|
||||
; SSE41-NEXT: movq %rdi, (%r11)
|
||||
; SSE41-NEXT: movq %rbp, 24(%r11)
|
||||
; SSE41-NEXT: movq %rsi, 8(%r11)
|
||||
; SSE41-NEXT: popq %rbx
|
||||
; SSE41-NEXT: popq %rbp
|
||||
; SSE41-NEXT: pinsrd $1, %r9d, %xmm0
|
||||
; SSE41-NEXT: movq %rdx, 16(%r10)
|
||||
; SSE41-NEXT: movq %rdi, (%r10)
|
||||
; SSE41-NEXT: movq %rcx, 24(%r10)
|
||||
; SSE41-NEXT: movq %rsi, 8(%r10)
|
||||
; SSE41-NEXT: retq
|
||||
;
|
||||
; AVX1-LABEL: saddo_v2i128:
|
||||
; AVX1: # %bb.0:
|
||||
; AVX1-NEXT: pushq %rbp
|
||||
; AVX1-NEXT: pushq %rbx
|
||||
; AVX1-NEXT: movq {{[0-9]+}}(%rsp), %r11
|
||||
; AVX1-NEXT: movq {{[0-9]+}}(%rsp), %r10
|
||||
; AVX1-NEXT: testq %r9, %r9
|
||||
; AVX1-NEXT: setns %al
|
||||
; AVX1-NEXT: testq %rsi, %rsi
|
||||
; AVX1-NEXT: setns %bl
|
||||
; AVX1-NEXT: cmpb %al, %bl
|
||||
; AVX1-NEXT: sete %bpl
|
||||
; AVX1-NEXT: addq %r8, %rdi
|
||||
; AVX1-NEXT: adcq %r9, %rsi
|
||||
; AVX1-NEXT: setns %al
|
||||
; AVX1-NEXT: cmpb %al, %bl
|
||||
; AVX1-NEXT: setne %al
|
||||
; AVX1-NEXT: andb %bpl, %al
|
||||
; AVX1-NEXT: seto %r8b
|
||||
; AVX1-NEXT: addq {{[0-9]+}}(%rsp), %rdx
|
||||
; AVX1-NEXT: movq %rcx, %rbp
|
||||
; AVX1-NEXT: adcq %r10, %rbp
|
||||
; AVX1-NEXT: setns %bl
|
||||
; AVX1-NEXT: testq %rcx, %rcx
|
||||
; AVX1-NEXT: setns %cl
|
||||
; AVX1-NEXT: cmpb %bl, %cl
|
||||
; AVX1-NEXT: setne %r8b
|
||||
; AVX1-NEXT: testq %r10, %r10
|
||||
; AVX1-NEXT: setns %bl
|
||||
; AVX1-NEXT: cmpb %bl, %cl
|
||||
; AVX1-NEXT: sete %cl
|
||||
; AVX1-NEXT: andb %r8b, %cl
|
||||
; AVX1-NEXT: movzbl %cl, %ecx
|
||||
; AVX1-NEXT: negl %ecx
|
||||
; AVX1-NEXT: movzbl %al, %eax
|
||||
; AVX1-NEXT: adcq {{[0-9]+}}(%rsp), %rcx
|
||||
; AVX1-NEXT: seto %al
|
||||
; AVX1-NEXT: movzbl %al, %r9d
|
||||
; AVX1-NEXT: negl %r9d
|
||||
; AVX1-NEXT: movzbl %r8b, %eax
|
||||
; AVX1-NEXT: negl %eax
|
||||
; AVX1-NEXT: vmovd %eax, %xmm0
|
||||
; AVX1-NEXT: vpinsrd $1, %ecx, %xmm0, %xmm0
|
||||
; AVX1-NEXT: movq %rdx, 16(%r11)
|
||||
; AVX1-NEXT: movq %rdi, (%r11)
|
||||
; AVX1-NEXT: movq %rbp, 24(%r11)
|
||||
; AVX1-NEXT: movq %rsi, 8(%r11)
|
||||
; AVX1-NEXT: popq %rbx
|
||||
; AVX1-NEXT: popq %rbp
|
||||
; AVX1-NEXT: vpinsrd $1, %r9d, %xmm0, %xmm0
|
||||
; AVX1-NEXT: movq %rdx, 16(%r10)
|
||||
; AVX1-NEXT: movq %rdi, (%r10)
|
||||
; AVX1-NEXT: movq %rcx, 24(%r10)
|
||||
; AVX1-NEXT: movq %rsi, 8(%r10)
|
||||
; AVX1-NEXT: retq
|
||||
;
|
||||
; AVX2-LABEL: saddo_v2i128:
|
||||
; AVX2: # %bb.0:
|
||||
; AVX2-NEXT: pushq %rbp
|
||||
; AVX2-NEXT: pushq %rbx
|
||||
; AVX2-NEXT: movq {{[0-9]+}}(%rsp), %r11
|
||||
; AVX2-NEXT: movq {{[0-9]+}}(%rsp), %r10
|
||||
; AVX2-NEXT: testq %r9, %r9
|
||||
; AVX2-NEXT: setns %al
|
||||
; AVX2-NEXT: testq %rsi, %rsi
|
||||
; AVX2-NEXT: setns %bl
|
||||
; AVX2-NEXT: cmpb %al, %bl
|
||||
; AVX2-NEXT: sete %bpl
|
||||
; AVX2-NEXT: addq %r8, %rdi
|
||||
; AVX2-NEXT: adcq %r9, %rsi
|
||||
; AVX2-NEXT: setns %al
|
||||
; AVX2-NEXT: cmpb %al, %bl
|
||||
; AVX2-NEXT: setne %al
|
||||
; AVX2-NEXT: andb %bpl, %al
|
||||
; AVX2-NEXT: seto %r8b
|
||||
; AVX2-NEXT: addq {{[0-9]+}}(%rsp), %rdx
|
||||
; AVX2-NEXT: movq %rcx, %rbp
|
||||
; AVX2-NEXT: adcq %r10, %rbp
|
||||
; AVX2-NEXT: setns %bl
|
||||
; AVX2-NEXT: testq %rcx, %rcx
|
||||
; AVX2-NEXT: setns %cl
|
||||
; AVX2-NEXT: cmpb %bl, %cl
|
||||
; AVX2-NEXT: setne %r8b
|
||||
; AVX2-NEXT: testq %r10, %r10
|
||||
; AVX2-NEXT: setns %bl
|
||||
; AVX2-NEXT: cmpb %bl, %cl
|
||||
; AVX2-NEXT: sete %cl
|
||||
; AVX2-NEXT: andb %r8b, %cl
|
||||
; AVX2-NEXT: movzbl %cl, %ecx
|
||||
; AVX2-NEXT: negl %ecx
|
||||
; AVX2-NEXT: movzbl %al, %eax
|
||||
; AVX2-NEXT: adcq {{[0-9]+}}(%rsp), %rcx
|
||||
; AVX2-NEXT: seto %al
|
||||
; AVX2-NEXT: movzbl %al, %r9d
|
||||
; AVX2-NEXT: negl %r9d
|
||||
; AVX2-NEXT: movzbl %r8b, %eax
|
||||
; AVX2-NEXT: negl %eax
|
||||
; AVX2-NEXT: vmovd %eax, %xmm0
|
||||
; AVX2-NEXT: vpinsrd $1, %ecx, %xmm0, %xmm0
|
||||
; AVX2-NEXT: movq %rdx, 16(%r11)
|
||||
; AVX2-NEXT: movq %rdi, (%r11)
|
||||
; AVX2-NEXT: movq %rbp, 24(%r11)
|
||||
; AVX2-NEXT: movq %rsi, 8(%r11)
|
||||
; AVX2-NEXT: popq %rbx
|
||||
; AVX2-NEXT: popq %rbp
|
||||
; AVX2-NEXT: vpinsrd $1, %r9d, %xmm0, %xmm0
|
||||
; AVX2-NEXT: movq %rdx, 16(%r10)
|
||||
; AVX2-NEXT: movq %rdi, (%r10)
|
||||
; AVX2-NEXT: movq %rcx, 24(%r10)
|
||||
; AVX2-NEXT: movq %rsi, 8(%r10)
|
||||
; AVX2-NEXT: retq
|
||||
;
|
||||
; AVX512-LABEL: saddo_v2i128:
|
||||
; AVX512: # %bb.0:
|
||||
; AVX512-NEXT: pushq %r14
|
||||
; AVX512-NEXT: pushq %rbx
|
||||
; AVX512-NEXT: movq {{[0-9]+}}(%rsp), %r10
|
||||
; AVX512-NEXT: movq {{[0-9]+}}(%rsp), %r11
|
||||
; AVX512-NEXT: addq {{[0-9]+}}(%rsp), %rdx
|
||||
; AVX512-NEXT: movq %rcx, %r14
|
||||
; AVX512-NEXT: adcq %r11, %r14
|
||||
; AVX512-NEXT: setns %bl
|
||||
; AVX512-NEXT: testq %rcx, %rcx
|
||||
; AVX512-NEXT: setns %cl
|
||||
; AVX512-NEXT: cmpb %bl, %cl
|
||||
; AVX512-NEXT: setne %bl
|
||||
; AVX512-NEXT: testq %r11, %r11
|
||||
; AVX512-NEXT: setns %al
|
||||
; AVX512-NEXT: cmpb %al, %cl
|
||||
; AVX512-NEXT: sete %al
|
||||
; AVX512-NEXT: andb %bl, %al
|
||||
; AVX512-NEXT: adcq {{[0-9]+}}(%rsp), %rcx
|
||||
; AVX512-NEXT: seto %al
|
||||
; AVX512-NEXT: kmovd %eax, %k0
|
||||
; AVX512-NEXT: testq %r9, %r9
|
||||
; AVX512-NEXT: setns %al
|
||||
; AVX512-NEXT: testq %rsi, %rsi
|
||||
; AVX512-NEXT: setns %cl
|
||||
; AVX512-NEXT: cmpb %al, %cl
|
||||
; AVX512-NEXT: sete %al
|
||||
; AVX512-NEXT: addq %r8, %rdi
|
||||
; AVX512-NEXT: adcq %r9, %rsi
|
||||
; AVX512-NEXT: setns %bl
|
||||
; AVX512-NEXT: cmpb %bl, %cl
|
||||
; AVX512-NEXT: setne %cl
|
||||
; AVX512-NEXT: andb %al, %cl
|
||||
; AVX512-NEXT: andl $1, %ecx
|
||||
; AVX512-NEXT: kmovw %ecx, %k1
|
||||
; AVX512-NEXT: seto %al
|
||||
; AVX512-NEXT: andl $1, %eax
|
||||
; AVX512-NEXT: kmovw %eax, %k1
|
||||
; AVX512-NEXT: kshiftlw $1, %k0, %k0
|
||||
; AVX512-NEXT: korw %k0, %k1, %k1
|
||||
; AVX512-NEXT: vpcmpeqd %xmm0, %xmm0, %xmm0
|
||||
; AVX512-NEXT: vmovdqa32 %xmm0, %xmm0 {%k1} {z}
|
||||
; AVX512-NEXT: movq %rdx, 16(%r10)
|
||||
; AVX512-NEXT: movq %rdi, (%r10)
|
||||
; AVX512-NEXT: movq %r14, 24(%r10)
|
||||
; AVX512-NEXT: movq %rcx, 24(%r10)
|
||||
; AVX512-NEXT: movq %rsi, 8(%r10)
|
||||
; AVX512-NEXT: popq %rbx
|
||||
; AVX512-NEXT: popq %r14
|
||||
; AVX512-NEXT: retq
|
||||
%t = call {<2 x i128>, <2 x i1>} @llvm.sadd.with.overflow.v2i128(<2 x i128> %a0, <2 x i128> %a1)
|
||||
%val = extractvalue {<2 x i128>, <2 x i1>} %t, 0
|
||||
|
@ -1154,275 +1154,131 @@ define <4 x i32> @ssubo_v4i1(<4 x i1> %a0, <4 x i1> %a1, <4 x i1>* %p2) nounwind
|
||||
define <2 x i32> @ssubo_v2i128(<2 x i128> %a0, <2 x i128> %a1, <2 x i128>* %p2) nounwind {
|
||||
; SSE2-LABEL: ssubo_v2i128:
|
||||
; SSE2: # %bb.0:
|
||||
; SSE2-NEXT: pushq %rbp
|
||||
; SSE2-NEXT: pushq %rbx
|
||||
; SSE2-NEXT: movq {{[0-9]+}}(%rsp), %r11
|
||||
; SSE2-NEXT: movq {{[0-9]+}}(%rsp), %r10
|
||||
; SSE2-NEXT: testq %r9, %r9
|
||||
; SSE2-NEXT: setns %al
|
||||
; SSE2-NEXT: testq %rsi, %rsi
|
||||
; SSE2-NEXT: setns %bl
|
||||
; SSE2-NEXT: cmpb %al, %bl
|
||||
; SSE2-NEXT: setne %bpl
|
||||
; SSE2-NEXT: subq %r8, %rdi
|
||||
; SSE2-NEXT: sbbq %r9, %rsi
|
||||
; SSE2-NEXT: setns %al
|
||||
; SSE2-NEXT: cmpb %al, %bl
|
||||
; SSE2-NEXT: setne %al
|
||||
; SSE2-NEXT: andb %bpl, %al
|
||||
; SSE2-NEXT: seto %r8b
|
||||
; SSE2-NEXT: subq {{[0-9]+}}(%rsp), %rdx
|
||||
; SSE2-NEXT: movq %rcx, %rbp
|
||||
; SSE2-NEXT: sbbq %r10, %rbp
|
||||
; SSE2-NEXT: setns %bl
|
||||
; SSE2-NEXT: testq %rcx, %rcx
|
||||
; SSE2-NEXT: setns %cl
|
||||
; SSE2-NEXT: cmpb %bl, %cl
|
||||
; SSE2-NEXT: setne %r8b
|
||||
; SSE2-NEXT: testq %r10, %r10
|
||||
; SSE2-NEXT: setns %bl
|
||||
; SSE2-NEXT: cmpb %bl, %cl
|
||||
; SSE2-NEXT: setne %cl
|
||||
; SSE2-NEXT: andb %r8b, %cl
|
||||
; SSE2-NEXT: movzbl %cl, %ecx
|
||||
; SSE2-NEXT: negl %ecx
|
||||
; SSE2-NEXT: movd %ecx, %xmm1
|
||||
; SSE2-NEXT: sbbq {{[0-9]+}}(%rsp), %rcx
|
||||
; SSE2-NEXT: seto %al
|
||||
; SSE2-NEXT: movzbl %al, %eax
|
||||
; SSE2-NEXT: negl %eax
|
||||
; SSE2-NEXT: movd %eax, %xmm1
|
||||
; SSE2-NEXT: movzbl %r8b, %eax
|
||||
; SSE2-NEXT: negl %eax
|
||||
; SSE2-NEXT: movd %eax, %xmm0
|
||||
; SSE2-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
|
||||
; SSE2-NEXT: movq %rdx, 16(%r11)
|
||||
; SSE2-NEXT: movq %rdi, (%r11)
|
||||
; SSE2-NEXT: movq %rbp, 24(%r11)
|
||||
; SSE2-NEXT: movq %rsi, 8(%r11)
|
||||
; SSE2-NEXT: popq %rbx
|
||||
; SSE2-NEXT: popq %rbp
|
||||
; SSE2-NEXT: movq %rdx, 16(%r10)
|
||||
; SSE2-NEXT: movq %rdi, (%r10)
|
||||
; SSE2-NEXT: movq %rcx, 24(%r10)
|
||||
; SSE2-NEXT: movq %rsi, 8(%r10)
|
||||
; SSE2-NEXT: retq
|
||||
;
|
||||
; SSSE3-LABEL: ssubo_v2i128:
|
||||
; SSSE3: # %bb.0:
|
||||
; SSSE3-NEXT: pushq %rbp
|
||||
; SSSE3-NEXT: pushq %rbx
|
||||
; SSSE3-NEXT: movq {{[0-9]+}}(%rsp), %r11
|
||||
; SSSE3-NEXT: movq {{[0-9]+}}(%rsp), %r10
|
||||
; SSSE3-NEXT: testq %r9, %r9
|
||||
; SSSE3-NEXT: setns %al
|
||||
; SSSE3-NEXT: testq %rsi, %rsi
|
||||
; SSSE3-NEXT: setns %bl
|
||||
; SSSE3-NEXT: cmpb %al, %bl
|
||||
; SSSE3-NEXT: setne %bpl
|
||||
; SSSE3-NEXT: subq %r8, %rdi
|
||||
; SSSE3-NEXT: sbbq %r9, %rsi
|
||||
; SSSE3-NEXT: setns %al
|
||||
; SSSE3-NEXT: cmpb %al, %bl
|
||||
; SSSE3-NEXT: setne %al
|
||||
; SSSE3-NEXT: andb %bpl, %al
|
||||
; SSSE3-NEXT: seto %r8b
|
||||
; SSSE3-NEXT: subq {{[0-9]+}}(%rsp), %rdx
|
||||
; SSSE3-NEXT: movq %rcx, %rbp
|
||||
; SSSE3-NEXT: sbbq %r10, %rbp
|
||||
; SSSE3-NEXT: setns %bl
|
||||
; SSSE3-NEXT: testq %rcx, %rcx
|
||||
; SSSE3-NEXT: setns %cl
|
||||
; SSSE3-NEXT: cmpb %bl, %cl
|
||||
; SSSE3-NEXT: setne %r8b
|
||||
; SSSE3-NEXT: testq %r10, %r10
|
||||
; SSSE3-NEXT: setns %bl
|
||||
; SSSE3-NEXT: cmpb %bl, %cl
|
||||
; SSSE3-NEXT: setne %cl
|
||||
; SSSE3-NEXT: andb %r8b, %cl
|
||||
; SSSE3-NEXT: movzbl %cl, %ecx
|
||||
; SSSE3-NEXT: negl %ecx
|
||||
; SSSE3-NEXT: movd %ecx, %xmm1
|
||||
; SSSE3-NEXT: sbbq {{[0-9]+}}(%rsp), %rcx
|
||||
; SSSE3-NEXT: seto %al
|
||||
; SSSE3-NEXT: movzbl %al, %eax
|
||||
; SSSE3-NEXT: negl %eax
|
||||
; SSSE3-NEXT: movd %eax, %xmm1
|
||||
; SSSE3-NEXT: movzbl %r8b, %eax
|
||||
; SSSE3-NEXT: negl %eax
|
||||
; SSSE3-NEXT: movd %eax, %xmm0
|
||||
; SSSE3-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
|
||||
; SSSE3-NEXT: movq %rdx, 16(%r11)
|
||||
; SSSE3-NEXT: movq %rdi, (%r11)
|
||||
; SSSE3-NEXT: movq %rbp, 24(%r11)
|
||||
; SSSE3-NEXT: movq %rsi, 8(%r11)
|
||||
; SSSE3-NEXT: popq %rbx
|
||||
; SSSE3-NEXT: popq %rbp
|
||||
; SSSE3-NEXT: movq %rdx, 16(%r10)
|
||||
; SSSE3-NEXT: movq %rdi, (%r10)
|
||||
; SSSE3-NEXT: movq %rcx, 24(%r10)
|
||||
; SSSE3-NEXT: movq %rsi, 8(%r10)
|
||||
; SSSE3-NEXT: retq
|
||||
;
|
||||
; SSE41-LABEL: ssubo_v2i128:
|
||||
; SSE41: # %bb.0:
|
||||
; SSE41-NEXT: pushq %rbp
|
||||
; SSE41-NEXT: pushq %rbx
|
||||
; SSE41-NEXT: movq {{[0-9]+}}(%rsp), %r11
|
||||
; SSE41-NEXT: movq {{[0-9]+}}(%rsp), %r10
|
||||
; SSE41-NEXT: testq %r9, %r9
|
||||
; SSE41-NEXT: setns %al
|
||||
; SSE41-NEXT: testq %rsi, %rsi
|
||||
; SSE41-NEXT: setns %bl
|
||||
; SSE41-NEXT: cmpb %al, %bl
|
||||
; SSE41-NEXT: setne %bpl
|
||||
; SSE41-NEXT: subq %r8, %rdi
|
||||
; SSE41-NEXT: sbbq %r9, %rsi
|
||||
; SSE41-NEXT: setns %al
|
||||
; SSE41-NEXT: cmpb %al, %bl
|
||||
; SSE41-NEXT: setne %al
|
||||
; SSE41-NEXT: andb %bpl, %al
|
||||
; SSE41-NEXT: seto %r8b
|
||||
; SSE41-NEXT: subq {{[0-9]+}}(%rsp), %rdx
|
||||
; SSE41-NEXT: movq %rcx, %rbp
|
||||
; SSE41-NEXT: sbbq %r10, %rbp
|
||||
; SSE41-NEXT: setns %bl
|
||||
; SSE41-NEXT: testq %rcx, %rcx
|
||||
; SSE41-NEXT: setns %cl
|
||||
; SSE41-NEXT: cmpb %bl, %cl
|
||||
; SSE41-NEXT: setne %r8b
|
||||
; SSE41-NEXT: testq %r10, %r10
|
||||
; SSE41-NEXT: setns %bl
|
||||
; SSE41-NEXT: cmpb %bl, %cl
|
||||
; SSE41-NEXT: setne %cl
|
||||
; SSE41-NEXT: andb %r8b, %cl
|
||||
; SSE41-NEXT: movzbl %cl, %ecx
|
||||
; SSE41-NEXT: negl %ecx
|
||||
; SSE41-NEXT: movzbl %al, %eax
|
||||
; SSE41-NEXT: sbbq {{[0-9]+}}(%rsp), %rcx
|
||||
; SSE41-NEXT: seto %al
|
||||
; SSE41-NEXT: movzbl %al, %r9d
|
||||
; SSE41-NEXT: negl %r9d
|
||||
; SSE41-NEXT: movzbl %r8b, %eax
|
||||
; SSE41-NEXT: negl %eax
|
||||
; SSE41-NEXT: movd %eax, %xmm0
|
||||
; SSE41-NEXT: pinsrd $1, %ecx, %xmm0
|
||||
; SSE41-NEXT: movq %rdx, 16(%r11)
|
||||
; SSE41-NEXT: movq %rdi, (%r11)
|
||||
; SSE41-NEXT: movq %rbp, 24(%r11)
|
||||
; SSE41-NEXT: movq %rsi, 8(%r11)
|
||||
; SSE41-NEXT: popq %rbx
|
||||
; SSE41-NEXT: popq %rbp
|
||||
; SSE41-NEXT: pinsrd $1, %r9d, %xmm0
|
||||
; SSE41-NEXT: movq %rdx, 16(%r10)
|
||||
; SSE41-NEXT: movq %rdi, (%r10)
|
||||
; SSE41-NEXT: movq %rcx, 24(%r10)
|
||||
; SSE41-NEXT: movq %rsi, 8(%r10)
|
||||
; SSE41-NEXT: retq
|
||||
;
|
||||
; AVX1-LABEL: ssubo_v2i128:
|
||||
; AVX1: # %bb.0:
|
||||
; AVX1-NEXT: pushq %rbp
|
||||
; AVX1-NEXT: pushq %rbx
|
||||
; AVX1-NEXT: movq {{[0-9]+}}(%rsp), %r11
|
||||
; AVX1-NEXT: movq {{[0-9]+}}(%rsp), %r10
|
||||
; AVX1-NEXT: testq %r9, %r9
|
||||
; AVX1-NEXT: setns %al
|
||||
; AVX1-NEXT: testq %rsi, %rsi
|
||||
; AVX1-NEXT: setns %bl
|
||||
; AVX1-NEXT: cmpb %al, %bl
|
||||
; AVX1-NEXT: setne %bpl
|
||||
; AVX1-NEXT: subq %r8, %rdi
|
||||
; AVX1-NEXT: sbbq %r9, %rsi
|
||||
; AVX1-NEXT: setns %al
|
||||
; AVX1-NEXT: cmpb %al, %bl
|
||||
; AVX1-NEXT: setne %al
|
||||
; AVX1-NEXT: andb %bpl, %al
|
||||
; AVX1-NEXT: seto %r8b
|
||||
; AVX1-NEXT: subq {{[0-9]+}}(%rsp), %rdx
|
||||
; AVX1-NEXT: movq %rcx, %rbp
|
||||
; AVX1-NEXT: sbbq %r10, %rbp
|
||||
; AVX1-NEXT: setns %bl
|
||||
; AVX1-NEXT: testq %rcx, %rcx
|
||||
; AVX1-NEXT: setns %cl
|
||||
; AVX1-NEXT: cmpb %bl, %cl
|
||||
; AVX1-NEXT: setne %r8b
|
||||
; AVX1-NEXT: testq %r10, %r10
|
||||
; AVX1-NEXT: setns %bl
|
||||
; AVX1-NEXT: cmpb %bl, %cl
|
||||
; AVX1-NEXT: setne %cl
|
||||
; AVX1-NEXT: andb %r8b, %cl
|
||||
; AVX1-NEXT: movzbl %cl, %ecx
|
||||
; AVX1-NEXT: negl %ecx
|
||||
; AVX1-NEXT: movzbl %al, %eax
|
||||
; AVX1-NEXT: sbbq {{[0-9]+}}(%rsp), %rcx
|
||||
; AVX1-NEXT: seto %al
|
||||
; AVX1-NEXT: movzbl %al, %r9d
|
||||
; AVX1-NEXT: negl %r9d
|
||||
; AVX1-NEXT: movzbl %r8b, %eax
|
||||
; AVX1-NEXT: negl %eax
|
||||
; AVX1-NEXT: vmovd %eax, %xmm0
|
||||
; AVX1-NEXT: vpinsrd $1, %ecx, %xmm0, %xmm0
|
||||
; AVX1-NEXT: movq %rdx, 16(%r11)
|
||||
; AVX1-NEXT: movq %rdi, (%r11)
|
||||
; AVX1-NEXT: movq %rbp, 24(%r11)
|
||||
; AVX1-NEXT: movq %rsi, 8(%r11)
|
||||
; AVX1-NEXT: popq %rbx
|
||||
; AVX1-NEXT: popq %rbp
|
||||
; AVX1-NEXT: vpinsrd $1, %r9d, %xmm0, %xmm0
|
||||
; AVX1-NEXT: movq %rdx, 16(%r10)
|
||||
; AVX1-NEXT: movq %rdi, (%r10)
|
||||
; AVX1-NEXT: movq %rcx, 24(%r10)
|
||||
; AVX1-NEXT: movq %rsi, 8(%r10)
|
||||
; AVX1-NEXT: retq
|
||||
;
|
||||
; AVX2-LABEL: ssubo_v2i128:
|
||||
; AVX2: # %bb.0:
|
||||
; AVX2-NEXT: pushq %rbp
|
||||
; AVX2-NEXT: pushq %rbx
|
||||
; AVX2-NEXT: movq {{[0-9]+}}(%rsp), %r11
|
||||
; AVX2-NEXT: movq {{[0-9]+}}(%rsp), %r10
|
||||
; AVX2-NEXT: testq %r9, %r9
|
||||
; AVX2-NEXT: setns %al
|
||||
; AVX2-NEXT: testq %rsi, %rsi
|
||||
; AVX2-NEXT: setns %bl
|
||||
; AVX2-NEXT: cmpb %al, %bl
|
||||
; AVX2-NEXT: setne %bpl
|
||||
; AVX2-NEXT: subq %r8, %rdi
|
||||
; AVX2-NEXT: sbbq %r9, %rsi
|
||||
; AVX2-NEXT: setns %al
|
||||
; AVX2-NEXT: cmpb %al, %bl
|
||||
; AVX2-NEXT: setne %al
|
||||
; AVX2-NEXT: andb %bpl, %al
|
||||
; AVX2-NEXT: seto %r8b
|
||||
; AVX2-NEXT: subq {{[0-9]+}}(%rsp), %rdx
|
||||
; AVX2-NEXT: movq %rcx, %rbp
|
||||
; AVX2-NEXT: sbbq %r10, %rbp
|
||||
; AVX2-NEXT: setns %bl
|
||||
; AVX2-NEXT: testq %rcx, %rcx
|
||||
; AVX2-NEXT: setns %cl
|
||||
; AVX2-NEXT: cmpb %bl, %cl
|
||||
; AVX2-NEXT: setne %r8b
|
||||
; AVX2-NEXT: testq %r10, %r10
|
||||
; AVX2-NEXT: setns %bl
|
||||
; AVX2-NEXT: cmpb %bl, %cl
|
||||
; AVX2-NEXT: setne %cl
|
||||
; AVX2-NEXT: andb %r8b, %cl
|
||||
; AVX2-NEXT: movzbl %cl, %ecx
|
||||
; AVX2-NEXT: negl %ecx
|
||||
; AVX2-NEXT: movzbl %al, %eax
|
||||
; AVX2-NEXT: sbbq {{[0-9]+}}(%rsp), %rcx
|
||||
; AVX2-NEXT: seto %al
|
||||
; AVX2-NEXT: movzbl %al, %r9d
|
||||
; AVX2-NEXT: negl %r9d
|
||||
; AVX2-NEXT: movzbl %r8b, %eax
|
||||
; AVX2-NEXT: negl %eax
|
||||
; AVX2-NEXT: vmovd %eax, %xmm0
|
||||
; AVX2-NEXT: vpinsrd $1, %ecx, %xmm0, %xmm0
|
||||
; AVX2-NEXT: movq %rdx, 16(%r11)
|
||||
; AVX2-NEXT: movq %rdi, (%r11)
|
||||
; AVX2-NEXT: movq %rbp, 24(%r11)
|
||||
; AVX2-NEXT: movq %rsi, 8(%r11)
|
||||
; AVX2-NEXT: popq %rbx
|
||||
; AVX2-NEXT: popq %rbp
|
||||
; AVX2-NEXT: vpinsrd $1, %r9d, %xmm0, %xmm0
|
||||
; AVX2-NEXT: movq %rdx, 16(%r10)
|
||||
; AVX2-NEXT: movq %rdi, (%r10)
|
||||
; AVX2-NEXT: movq %rcx, 24(%r10)
|
||||
; AVX2-NEXT: movq %rsi, 8(%r10)
|
||||
; AVX2-NEXT: retq
|
||||
;
|
||||
; AVX512-LABEL: ssubo_v2i128:
|
||||
; AVX512: # %bb.0:
|
||||
; AVX512-NEXT: pushq %r14
|
||||
; AVX512-NEXT: pushq %rbx
|
||||
; AVX512-NEXT: movq {{[0-9]+}}(%rsp), %r10
|
||||
; AVX512-NEXT: movq {{[0-9]+}}(%rsp), %r11
|
||||
; AVX512-NEXT: subq {{[0-9]+}}(%rsp), %rdx
|
||||
; AVX512-NEXT: movq %rcx, %r14
|
||||
; AVX512-NEXT: sbbq %r11, %r14
|
||||
; AVX512-NEXT: setns %bl
|
||||
; AVX512-NEXT: testq %rcx, %rcx
|
||||
; AVX512-NEXT: setns %cl
|
||||
; AVX512-NEXT: cmpb %bl, %cl
|
||||
; AVX512-NEXT: setne %bl
|
||||
; AVX512-NEXT: testq %r11, %r11
|
||||
; AVX512-NEXT: setns %al
|
||||
; AVX512-NEXT: cmpb %al, %cl
|
||||
; AVX512-NEXT: setne %al
|
||||
; AVX512-NEXT: andb %bl, %al
|
||||
; AVX512-NEXT: sbbq {{[0-9]+}}(%rsp), %rcx
|
||||
; AVX512-NEXT: seto %al
|
||||
; AVX512-NEXT: kmovd %eax, %k0
|
||||
; AVX512-NEXT: testq %r9, %r9
|
||||
; AVX512-NEXT: setns %al
|
||||
; AVX512-NEXT: testq %rsi, %rsi
|
||||
; AVX512-NEXT: setns %cl
|
||||
; AVX512-NEXT: cmpb %al, %cl
|
||||
; AVX512-NEXT: setne %al
|
||||
; AVX512-NEXT: subq %r8, %rdi
|
||||
; AVX512-NEXT: sbbq %r9, %rsi
|
||||
; AVX512-NEXT: setns %bl
|
||||
; AVX512-NEXT: cmpb %bl, %cl
|
||||
; AVX512-NEXT: setne %cl
|
||||
; AVX512-NEXT: andb %al, %cl
|
||||
; AVX512-NEXT: andl $1, %ecx
|
||||
; AVX512-NEXT: kmovw %ecx, %k1
|
||||
; AVX512-NEXT: seto %al
|
||||
; AVX512-NEXT: andl $1, %eax
|
||||
; AVX512-NEXT: kmovw %eax, %k1
|
||||
; AVX512-NEXT: kshiftlw $1, %k0, %k0
|
||||
; AVX512-NEXT: korw %k0, %k1, %k1
|
||||
; AVX512-NEXT: vpcmpeqd %xmm0, %xmm0, %xmm0
|
||||
; AVX512-NEXT: vmovdqa32 %xmm0, %xmm0 {%k1} {z}
|
||||
; AVX512-NEXT: movq %rdx, 16(%r10)
|
||||
; AVX512-NEXT: movq %rdi, (%r10)
|
||||
; AVX512-NEXT: movq %r14, 24(%r10)
|
||||
; AVX512-NEXT: movq %rcx, 24(%r10)
|
||||
; AVX512-NEXT: movq %rsi, 8(%r10)
|
||||
; AVX512-NEXT: popq %rbx
|
||||
; AVX512-NEXT: popq %r14
|
||||
; AVX512-NEXT: retq
|
||||
%t = call {<2 x i128>, <2 x i1>} @llvm.ssub.with.overflow.v2i128(<2 x i128> %a0, <2 x i128> %a1)
|
||||
%val = extractvalue {<2 x i128>, <2 x i1>} %t, 0
|
||||
|
@ -5,55 +5,35 @@
|
||||
define zeroext i1 @saddoi128(i128 %v1, i128 %v2, i128* %res) nounwind {
|
||||
; X64-LABEL: saddoi128:
|
||||
; X64: ## %bb.0:
|
||||
; X64-NEXT: testq %rcx, %rcx
|
||||
; X64-NEXT: setns %r9b
|
||||
; X64-NEXT: testq %rsi, %rsi
|
||||
; X64-NEXT: setns %al
|
||||
; X64-NEXT: cmpb %r9b, %al
|
||||
; X64-NEXT: sete %r9b
|
||||
; X64-NEXT: addq %rdx, %rdi
|
||||
; X64-NEXT: adcq %rcx, %rsi
|
||||
; X64-NEXT: setns %cl
|
||||
; X64-NEXT: cmpb %cl, %al
|
||||
; X64-NEXT: setne %al
|
||||
; X64-NEXT: andb %r9b, %al
|
||||
; X64-NEXT: seto %al
|
||||
; X64-NEXT: movq %rdi, (%r8)
|
||||
; X64-NEXT: movq %rsi, 8(%r8)
|
||||
; X64-NEXT: retq
|
||||
;
|
||||
; X86-LABEL: saddoi128:
|
||||
; X86: ## %bb.0:
|
||||
; X86-NEXT: pushl %ebp
|
||||
; X86-NEXT: pushl %ebx
|
||||
; X86-NEXT: pushl %edi
|
||||
; X86-NEXT: pushl %esi
|
||||
; X86-NEXT: movl {{[0-9]+}}(%esp), %ebp
|
||||
; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
|
||||
; X86-NEXT: movl {{[0-9]+}}(%esp), %edx
|
||||
; X86-NEXT: movl {{[0-9]+}}(%esp), %esi
|
||||
; X86-NEXT: movl {{[0-9]+}}(%esp), %edi
|
||||
; X86-NEXT: movl {{[0-9]+}}(%esp), %ebx
|
||||
; X86-NEXT: cmpl $0, {{[0-9]+}}(%esp)
|
||||
; X86-NEXT: setns %al
|
||||
; X86-NEXT: testl %ebx, %ebx
|
||||
; X86-NEXT: setns %ah
|
||||
; X86-NEXT: cmpb %al, %ah
|
||||
; X86-NEXT: sete %cl
|
||||
; X86-NEXT: addl {{[0-9]+}}(%esp), %esi
|
||||
; X86-NEXT: adcl {{[0-9]+}}(%esp), %edi
|
||||
; X86-NEXT: adcl {{[0-9]+}}(%esp), %edx
|
||||
; X86-NEXT: addl {{[0-9]+}}(%esp), %edi
|
||||
; X86-NEXT: adcl {{[0-9]+}}(%esp), %ebx
|
||||
; X86-NEXT: setns %al
|
||||
; X86-NEXT: cmpb %al, %ah
|
||||
; X86-NEXT: setne %al
|
||||
; X86-NEXT: andb %cl, %al
|
||||
; X86-NEXT: movl %esi, (%ebp)
|
||||
; X86-NEXT: movl %edi, 4(%ebp)
|
||||
; X86-NEXT: movl %edx, 8(%ebp)
|
||||
; X86-NEXT: movl %ebx, 12(%ebp)
|
||||
; X86-NEXT: adcl {{[0-9]+}}(%esp), %esi
|
||||
; X86-NEXT: adcl {{[0-9]+}}(%esp), %edx
|
||||
; X86-NEXT: seto %al
|
||||
; X86-NEXT: movl %edi, (%ecx)
|
||||
; X86-NEXT: movl %ebx, 4(%ecx)
|
||||
; X86-NEXT: movl %esi, 8(%ecx)
|
||||
; X86-NEXT: movl %edx, 12(%ecx)
|
||||
; X86-NEXT: popl %esi
|
||||
; X86-NEXT: popl %edi
|
||||
; X86-NEXT: popl %ebx
|
||||
; X86-NEXT: popl %ebp
|
||||
; X86-NEXT: retl
|
||||
%t = call {i128, i1} @llvm.sadd.with.overflow.i128(i128 %v1, i128 %v2)
|
||||
%val = extractvalue {i128, i1} %t, 0
|
||||
@ -106,55 +86,35 @@ define zeroext i1 @uaddoi128(i128 %v1, i128 %v2, i128* %res) nounwind {
|
||||
define zeroext i1 @ssuboi128(i128 %v1, i128 %v2, i128* %res) nounwind {
|
||||
; X64-LABEL: ssuboi128:
|
||||
; X64: ## %bb.0:
|
||||
; X64-NEXT: testq %rcx, %rcx
|
||||
; X64-NEXT: setns %r9b
|
||||
; X64-NEXT: testq %rsi, %rsi
|
||||
; X64-NEXT: setns %al
|
||||
; X64-NEXT: cmpb %r9b, %al
|
||||
; X64-NEXT: setne %r9b
|
||||
; X64-NEXT: subq %rdx, %rdi
|
||||
; X64-NEXT: sbbq %rcx, %rsi
|
||||
; X64-NEXT: setns %cl
|
||||
; X64-NEXT: cmpb %cl, %al
|
||||
; X64-NEXT: setne %al
|
||||
; X64-NEXT: andb %r9b, %al
|
||||
; X64-NEXT: seto %al
|
||||
; X64-NEXT: movq %rdi, (%r8)
|
||||
; X64-NEXT: movq %rsi, 8(%r8)
|
||||
; X64-NEXT: retq
|
||||
;
|
||||
; X86-LABEL: ssuboi128:
|
||||
; X86: ## %bb.0:
|
||||
; X86-NEXT: pushl %ebp
|
||||
; X86-NEXT: pushl %ebx
|
||||
; X86-NEXT: pushl %edi
|
||||
; X86-NEXT: pushl %esi
|
||||
; X86-NEXT: movl {{[0-9]+}}(%esp), %ebp
|
||||
; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
|
||||
; X86-NEXT: movl {{[0-9]+}}(%esp), %edx
|
||||
; X86-NEXT: movl {{[0-9]+}}(%esp), %esi
|
||||
; X86-NEXT: movl {{[0-9]+}}(%esp), %edi
|
||||
; X86-NEXT: movl {{[0-9]+}}(%esp), %ebx
|
||||
; X86-NEXT: cmpl $0, {{[0-9]+}}(%esp)
|
||||
; X86-NEXT: setns %al
|
||||
; X86-NEXT: testl %ebx, %ebx
|
||||
; X86-NEXT: setns %ah
|
||||
; X86-NEXT: cmpb %al, %ah
|
||||
; X86-NEXT: setne %cl
|
||||
; X86-NEXT: subl {{[0-9]+}}(%esp), %esi
|
||||
; X86-NEXT: sbbl {{[0-9]+}}(%esp), %edi
|
||||
; X86-NEXT: sbbl {{[0-9]+}}(%esp), %edx
|
||||
; X86-NEXT: subl {{[0-9]+}}(%esp), %edi
|
||||
; X86-NEXT: sbbl {{[0-9]+}}(%esp), %ebx
|
||||
; X86-NEXT: setns %al
|
||||
; X86-NEXT: cmpb %al, %ah
|
||||
; X86-NEXT: setne %al
|
||||
; X86-NEXT: andb %cl, %al
|
||||
; X86-NEXT: movl %esi, (%ebp)
|
||||
; X86-NEXT: movl %edi, 4(%ebp)
|
||||
; X86-NEXT: movl %edx, 8(%ebp)
|
||||
; X86-NEXT: movl %ebx, 12(%ebp)
|
||||
; X86-NEXT: sbbl {{[0-9]+}}(%esp), %esi
|
||||
; X86-NEXT: sbbl {{[0-9]+}}(%esp), %edx
|
||||
; X86-NEXT: seto %al
|
||||
; X86-NEXT: movl %edi, (%ecx)
|
||||
; X86-NEXT: movl %ebx, 4(%ecx)
|
||||
; X86-NEXT: movl %esi, 8(%ecx)
|
||||
; X86-NEXT: movl %edx, 12(%ecx)
|
||||
; X86-NEXT: popl %esi
|
||||
; X86-NEXT: popl %edi
|
||||
; X86-NEXT: popl %ebx
|
||||
; X86-NEXT: popl %ebp
|
||||
; X86-NEXT: retl
|
||||
%t = call {i128, i1} @llvm.ssub.with.overflow.i128(i128 %v1, i128 %v2)
|
||||
%val = extractvalue {i128, i1} %t, 0
|
||||
|
Loading…
Reference in New Issue
Block a user