1
0
mirror of https://github.com/RPCS3/llvm-mirror.git synced 2024-11-22 10:42:39 +01:00

[X86][SelectionDAG] Add SADDO_CARRY and SSUBO_CARRY to support multipart signed add/sub overflow legalization.

This passes existing X86 test but I'm not sure if it handles all type
legalization cases it needs to.

Alternative to D89200

Reviewed By: efriedma

Differential Revision: https://reviews.llvm.org/D89222
This commit is contained in:
Craig Topper 2020-10-12 23:18:22 -07:00
parent 2b696bcac5
commit 0120cd1285
17 changed files with 455 additions and 826 deletions

View File

@ -283,6 +283,16 @@ enum NodeType {
ADDCARRY,
SUBCARRY,
/// Carry-using overflow-aware nodes for multiple precision addition and
/// subtraction. These nodes take three operands: The first two are normal lhs
/// and rhs to the add or sub, and the third is a boolean indicating if there
/// is an incoming carry. They produce two results: the normal result of the
/// add or sub, and a boolean that indicates if an overflow occured (*not*
/// flag, because it may be a store to memory, etc.). If the type of the
/// boolean is not i1 then the high bits conform to getBooleanContents.
SADDO_CARRY,
SSUBO_CARRY,
/// RESULT, BOOL = [SU]ADDO(LHS, RHS) - Overflow-aware nodes for addition.
/// These nodes take two operands: the normal LHS and RHS to the add. They
/// produce two results: the normal result of the add, and a boolean that

View File

@ -411,9 +411,11 @@ namespace {
SDValue visitSUBO(SDNode *N);
SDValue visitADDE(SDNode *N);
SDValue visitADDCARRY(SDNode *N);
SDValue visitSADDO_CARRY(SDNode *N);
SDValue visitADDCARRYLike(SDValue N0, SDValue N1, SDValue CarryIn, SDNode *N);
SDValue visitSUBE(SDNode *N);
SDValue visitSUBCARRY(SDNode *N);
SDValue visitSSUBO_CARRY(SDNode *N);
SDValue visitMUL(SDNode *N);
SDValue visitMULFIX(SDNode *N);
SDValue useDivRem(SDNode *N);
@ -1600,8 +1602,10 @@ SDValue DAGCombiner::visit(SDNode *N) {
case ISD::USUBO: return visitSUBO(N);
case ISD::ADDE: return visitADDE(N);
case ISD::ADDCARRY: return visitADDCARRY(N);
case ISD::SADDO_CARRY: return visitSADDO_CARRY(N);
case ISD::SUBE: return visitSUBE(N);
case ISD::SUBCARRY: return visitSUBCARRY(N);
case ISD::SSUBO_CARRY: return visitSSUBO_CARRY(N);
case ISD::SMULFIX:
case ISD::SMULFIXSAT:
case ISD::UMULFIX:
@ -2836,6 +2840,28 @@ SDValue DAGCombiner::visitADDCARRY(SDNode *N) {
return SDValue();
}
SDValue DAGCombiner::visitSADDO_CARRY(SDNode *N) {
SDValue N0 = N->getOperand(0);
SDValue N1 = N->getOperand(1);
SDValue CarryIn = N->getOperand(2);
SDLoc DL(N);
// canonicalize constant to RHS
ConstantSDNode *N0C = dyn_cast<ConstantSDNode>(N0);
ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N1);
if (N0C && !N1C)
return DAG.getNode(ISD::SADDO_CARRY, DL, N->getVTList(), N1, N0, CarryIn);
// fold (saddo_carry x, y, false) -> (saddo x, y)
if (isNullConstant(CarryIn)) {
if (!LegalOperations ||
TLI.isOperationLegalOrCustom(ISD::SADDO, N->getValueType(0)))
return DAG.getNode(ISD::SADDO, DL, N->getVTList(), N0, N1);
}
return SDValue();
}
/**
* If we are facing some sort of diamond carry propapagtion pattern try to
* break it up to generate something like:
@ -3517,6 +3543,21 @@ SDValue DAGCombiner::visitSUBCARRY(SDNode *N) {
return SDValue();
}
SDValue DAGCombiner::visitSSUBO_CARRY(SDNode *N) {
SDValue N0 = N->getOperand(0);
SDValue N1 = N->getOperand(1);
SDValue CarryIn = N->getOperand(2);
// fold (ssubo_carry x, y, false) -> (ssubo x, y)
if (isNullConstant(CarryIn)) {
if (!LegalOperations ||
TLI.isOperationLegalOrCustom(ISD::SSUBO, N->getValueType(0)))
return DAG.getNode(ISD::SSUBO, SDLoc(N), N->getVTList(), N0, N1);
}
return SDValue();
}
// Notice that "mulfix" can be any of SMULFIX, SMULFIXSAT, UMULFIX and
// UMULFIXSAT here.
SDValue DAGCombiner::visitMULFIX(SDNode *N) {

View File

@ -152,6 +152,9 @@ void DAGTypeLegalizer::PromoteIntegerResult(SDNode *N, unsigned ResNo) {
case ISD::ADDCARRY:
case ISD::SUBCARRY: Res = PromoteIntRes_ADDSUBCARRY(N, ResNo); break;
case ISD::SADDO_CARRY:
case ISD::SSUBO_CARRY: Res = PromoteIntRes_SADDSUBO_CARRY(N, ResNo); break;
case ISD::SADDSAT:
case ISD::UADDSAT:
case ISD::SSUBSAT:
@ -1288,6 +1291,12 @@ SDValue DAGTypeLegalizer::PromoteIntRes_ADDSUBCARRY(SDNode *N, unsigned ResNo) {
return SDValue(Res.getNode(), 0);
}
SDValue DAGTypeLegalizer::PromoteIntRes_SADDSUBO_CARRY(SDNode *N,
unsigned ResNo) {
assert(ResNo == 1 && "Don't know how to promote other results yet.");
return PromoteIntRes_Overflow(N);
}
SDValue DAGTypeLegalizer::PromoteIntRes_ABS(SDNode *N) {
SDValue Op0 = SExtPromotedInteger(N->getOperand(0));
return DAG.getNode(ISD::ABS, SDLoc(N), Op0.getValueType(), Op0);
@ -1470,6 +1479,8 @@ bool DAGTypeLegalizer::PromoteIntegerOperand(SDNode *N, unsigned OpNo) {
case ISD::ROTL:
case ISD::ROTR: Res = PromoteIntOp_Shift(N); break;
case ISD::SADDO_CARRY:
case ISD::SSUBO_CARRY:
case ISD::ADDCARRY:
case ISD::SUBCARRY: Res = PromoteIntOp_ADDSUBCARRY(N, OpNo); break;
@ -2087,6 +2098,9 @@ void DAGTypeLegalizer::ExpandIntegerResult(SDNode *N, unsigned ResNo) {
case ISD::ADDCARRY:
case ISD::SUBCARRY: ExpandIntRes_ADDSUBCARRY(N, Lo, Hi); break;
case ISD::SADDO_CARRY:
case ISD::SSUBO_CARRY: ExpandIntRes_SADDSUBO_CARRY(N, Lo, Hi); break;
case ISD::SHL:
case ISD::SRA:
case ISD::SRL: ExpandIntRes_Shift(N, Lo, Hi); break;
@ -2710,6 +2724,26 @@ void DAGTypeLegalizer::ExpandIntRes_ADDSUBCARRY(SDNode *N,
ReplaceValueWith(SDValue(N, 1), Hi.getValue(1));
}
void DAGTypeLegalizer::ExpandIntRes_SADDSUBO_CARRY(SDNode *N,
SDValue &Lo, SDValue &Hi) {
// Expand the subcomponents.
SDValue LHSL, LHSH, RHSL, RHSH;
SDLoc dl(N);
GetExpandedInteger(N->getOperand(0), LHSL, LHSH);
GetExpandedInteger(N->getOperand(1), RHSL, RHSH);
SDVTList VTList = DAG.getVTList(LHSL.getValueType(), N->getValueType(1));
// We need to use an unsigned carry op for the lo part.
unsigned CarryOp = N->getOpcode() == ISD::SADDO_CARRY ? ISD::ADDCARRY
: ISD::SUBCARRY;
Lo = DAG.getNode(CarryOp, dl, VTList, { LHSL, RHSL, N->getOperand(2) });
Hi = DAG.getNode(N->getOpcode(), dl, VTList, { LHSH, RHSH, Lo.getValue(1) });
// Legalized the flag result - switch anything that used the old flag to
// use the new one.
ReplaceValueWith(SDValue(N, 1), Hi.getValue(1));
}
void DAGTypeLegalizer::ExpandIntRes_ANY_EXTEND(SDNode *N,
SDValue &Lo, SDValue &Hi) {
EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0));
@ -3515,40 +3549,66 @@ void DAGTypeLegalizer::ExpandIntRes_SADDSUBO(SDNode *Node,
SDValue RHS = Node->getOperand(1);
SDLoc dl(Node);
// Expand the result by simply replacing it with the equivalent
// non-overflow-checking operation.
SDValue Sum = DAG.getNode(Node->getOpcode() == ISD::SADDO ?
ISD::ADD : ISD::SUB, dl, LHS.getValueType(),
LHS, RHS);
SplitInteger(Sum, Lo, Hi);
SDValue Ovf;
// Compute the overflow.
//
// LHSSign -> LHS >= 0
// RHSSign -> RHS >= 0
// SumSign -> Sum >= 0
//
// Add:
// Overflow -> (LHSSign == RHSSign) && (LHSSign != SumSign)
// Sub:
// Overflow -> (LHSSign != RHSSign) && (LHSSign != SumSign)
//
EVT OType = Node->getValueType(1);
SDValue Zero = DAG.getConstant(0, dl, LHS.getValueType());
unsigned CarryOp;
switch(Node->getOpcode()) {
default: llvm_unreachable("Node has unexpected Opcode");
case ISD::SADDO: CarryOp = ISD::SADDO_CARRY; break;
case ISD::SSUBO: CarryOp = ISD::SSUBO_CARRY; break;
}
SDValue LHSSign = DAG.getSetCC(dl, OType, LHS, Zero, ISD::SETGE);
SDValue RHSSign = DAG.getSetCC(dl, OType, RHS, Zero, ISD::SETGE);
SDValue SignsMatch = DAG.getSetCC(dl, OType, LHSSign, RHSSign,
Node->getOpcode() == ISD::SADDO ?
ISD::SETEQ : ISD::SETNE);
bool HasCarryOp = TLI.isOperationLegalOrCustom(
CarryOp, TLI.getTypeToExpandTo(*DAG.getContext(), LHS.getValueType()));
SDValue SumSign = DAG.getSetCC(dl, OType, Sum, Zero, ISD::SETGE);
SDValue SumSignNE = DAG.getSetCC(dl, OType, LHSSign, SumSign, ISD::SETNE);
if (HasCarryOp) {
// Expand the subcomponents.
SDValue LHSL, LHSH, RHSL, RHSH;
GetExpandedInteger(LHS, LHSL, LHSH);
GetExpandedInteger(RHS, RHSL, RHSH);
SDVTList VTList = DAG.getVTList(LHSL.getValueType(), Node->getValueType(1));
SDValue Cmp = DAG.getNode(ISD::AND, dl, OType, SignsMatch, SumSignNE);
Lo = DAG.getNode(Node->getOpcode() == ISD::SADDO ?
ISD::UADDO : ISD::USUBO, dl, VTList, { LHSL, RHSL });
Hi = DAG.getNode(CarryOp, dl, VTList, { LHSH, RHSH, Lo.getValue(1) });
Ovf = Hi.getValue(1);
} else {
// Expand the result by simply replacing it with the equivalent
// non-overflow-checking operation.
SDValue Sum = DAG.getNode(Node->getOpcode() == ISD::SADDO ?
ISD::ADD : ISD::SUB, dl, LHS.getValueType(),
LHS, RHS);
SplitInteger(Sum, Lo, Hi);
// Compute the overflow.
//
// LHSSign -> LHS >= 0
// RHSSign -> RHS >= 0
// SumSign -> Sum >= 0
//
// Add:
// Overflow -> (LHSSign == RHSSign) && (LHSSign != SumSign)
// Sub:
// Overflow -> (LHSSign != RHSSign) && (LHSSign != SumSign)
//
EVT OType = Node->getValueType(1);
SDValue Zero = DAG.getConstant(0, dl, LHS.getValueType());
SDValue LHSSign = DAG.getSetCC(dl, OType, LHS, Zero, ISD::SETGE);
SDValue RHSSign = DAG.getSetCC(dl, OType, RHS, Zero, ISD::SETGE);
SDValue SignsMatch = DAG.getSetCC(dl, OType, LHSSign, RHSSign,
Node->getOpcode() == ISD::SADDO ?
ISD::SETEQ : ISD::SETNE);
SDValue SumSign = DAG.getSetCC(dl, OType, Sum, Zero, ISD::SETGE);
SDValue SumSignNE = DAG.getSetCC(dl, OType, LHSSign, SumSign, ISD::SETNE);
Ovf = DAG.getNode(ISD::AND, dl, OType, SignsMatch, SumSignNE);
}
// Use the calculated overflow everywhere.
ReplaceValueWith(SDValue(Node, 1), Cmp);
ReplaceValueWith(SDValue(Node, 1), Ovf);
}
void DAGTypeLegalizer::ExpandIntRes_SDIV(SDNode *N,

View File

@ -337,6 +337,7 @@ private:
SDValue PromoteIntRes_TRUNCATE(SDNode *N);
SDValue PromoteIntRes_UADDSUBO(SDNode *N, unsigned ResNo);
SDValue PromoteIntRes_ADDSUBCARRY(SDNode *N, unsigned ResNo);
SDValue PromoteIntRes_SADDSUBO_CARRY(SDNode *N, unsigned ResNo);
SDValue PromoteIntRes_UNDEF(SDNode *N);
SDValue PromoteIntRes_VAARG(SDNode *N);
SDValue PromoteIntRes_VSCALE(SDNode *N);
@ -429,6 +430,7 @@ private:
void ExpandIntRes_ADDSUBC (SDNode *N, SDValue &Lo, SDValue &Hi);
void ExpandIntRes_ADDSUBE (SDNode *N, SDValue &Lo, SDValue &Hi);
void ExpandIntRes_ADDSUBCARRY (SDNode *N, SDValue &Lo, SDValue &Hi);
void ExpandIntRes_SADDSUBO_CARRY (SDNode *N, SDValue &Lo, SDValue &Hi);
void ExpandIntRes_BITREVERSE (SDNode *N, SDValue &Lo, SDValue &Hi);
void ExpandIntRes_BSWAP (SDNode *N, SDValue &Lo, SDValue &Hi);
void ExpandIntRes_PARITY (SDNode *N, SDValue &Lo, SDValue &Hi);

View File

@ -293,6 +293,7 @@ std::string SDNode::getOperationName(const SelectionDAG *G) const {
case ISD::ADDC: return "addc";
case ISD::ADDE: return "adde";
case ISD::ADDCARRY: return "addcarry";
case ISD::SADDO_CARRY: return "saddo_carry";
case ISD::SADDO: return "saddo";
case ISD::UADDO: return "uaddo";
case ISD::SSUBO: return "ssubo";
@ -302,6 +303,7 @@ std::string SDNode::getOperationName(const SelectionDAG *G) const {
case ISD::SUBC: return "subc";
case ISD::SUBE: return "sube";
case ISD::SUBCARRY: return "subcarry";
case ISD::SSUBO_CARRY: return "ssubo_carry";
case ISD::SHL_PARTS: return "shl_parts";
case ISD::SRA_PARTS: return "sra_parts";
case ISD::SRL_PARTS: return "srl_parts";

View File

@ -680,6 +680,8 @@ void TargetLoweringBase::initActions() {
setOperationAction(ISD::ADDCARRY, VT, Expand);
setOperationAction(ISD::SUBCARRY, VT, Expand);
setOperationAction(ISD::SETCCCARRY, VT, Expand);
setOperationAction(ISD::SADDO_CARRY, VT, Expand);
setOperationAction(ISD::SSUBO_CARRY, VT, Expand);
// ADDC/ADDE/SUBC/SUBE default to expand.
setOperationAction(ISD::ADDC, VT, Expand);

View File

@ -1914,6 +1914,8 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM,
setOperationAction(ISD::ADDCARRY, VT, Custom);
setOperationAction(ISD::SUBCARRY, VT, Custom);
setOperationAction(ISD::SETCCCARRY, VT, Custom);
setOperationAction(ISD::SADDO_CARRY, VT, Custom);
setOperationAction(ISD::SSUBO_CARRY, VT, Custom);
}
if (!Subtarget.is64Bit()) {
@ -29241,6 +29243,7 @@ static SDValue LowerATOMIC_STORE(SDValue Op, SelectionDAG &DAG,
static SDValue LowerADDSUBCARRY(SDValue Op, SelectionDAG &DAG) {
SDNode *N = Op.getNode();
MVT VT = N->getSimpleValueType(0);
unsigned Opc = Op.getOpcode();
// Let legalize expand this if it isn't a legal type yet.
if (!DAG.getTargetLoweringInfo().isTypeLegal(VT))
@ -29255,11 +29258,14 @@ static SDValue LowerADDSUBCARRY(SDValue Op, SelectionDAG &DAG) {
Carry = DAG.getNode(X86ISD::ADD, DL, DAG.getVTList(CarryVT, MVT::i32),
Carry, DAG.getAllOnesConstant(DL, CarryVT));
unsigned Opc = Op.getOpcode() == ISD::ADDCARRY ? X86ISD::ADC : X86ISD::SBB;
SDValue Sum = DAG.getNode(Opc, DL, VTs, Op.getOperand(0),
Op.getOperand(1), Carry.getValue(1));
bool IsAdd = Opc == ISD::ADDCARRY || Opc == ISD::SADDO_CARRY;
SDValue Sum = DAG.getNode(IsAdd ? X86ISD::ADC : X86ISD::SBB, DL, VTs,
Op.getOperand(0), Op.getOperand(1),
Carry.getValue(1));
SDValue SetCC = getSETCC(X86::COND_B, Sum.getValue(1), DL, DAG);
bool IsSigned = Opc == ISD::SADDO_CARRY || Opc == ISD::SSUBO_CARRY;
SDValue SetCC = getSETCC(IsSigned ? X86::COND_O : X86::COND_B,
Sum.getValue(1), DL, DAG);
if (N->getValueType(1) == MVT::i1)
SetCC = DAG.getNode(ISD::TRUNCATE, DL, MVT::i1, SetCC);
@ -29784,6 +29790,8 @@ SDValue X86TargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const {
case ISD::UMULO: return LowerXALUO(Op, DAG);
case ISD::READCYCLECOUNTER: return LowerREADCYCLECOUNTER(Op, Subtarget,DAG);
case ISD::BITCAST: return LowerBITCAST(Op, Subtarget, DAG);
case ISD::SADDO_CARRY:
case ISD::SSUBO_CARRY:
case ISD::ADDCARRY:
case ISD::SUBCARRY: return LowerADDSUBCARRY(Op, DAG);
case ISD::ADD:

View File

@ -139,26 +139,12 @@ define i128 @knownbits_mask_addc_shl(i64 %a0, i64 %a1, i64 %a2) nounwind {
define {i32, i1} @knownbits_uaddo_saddo(i64 %a0, i64 %a1) nounwind {
; X32-LABEL: knownbits_uaddo_saddo:
; X32: # %bb.0:
; X32-NEXT: pushl %ebx
; X32-NEXT: movl {{[0-9]+}}(%esp), %eax
; X32-NEXT: movl {{[0-9]+}}(%esp), %ecx
; X32-NEXT: movl %ecx, %edx
; X32-NEXT: addl %eax, %edx
; X32-NEXT: setb %bl
; X32-NEXT: testl %eax, %eax
; X32-NEXT: setns %al
; X32-NEXT: testl %ecx, %ecx
; X32-NEXT: setns %cl
; X32-NEXT: cmpb %al, %cl
; X32-NEXT: sete %al
; X32-NEXT: testl %edx, %edx
; X32-NEXT: setns %dl
; X32-NEXT: cmpb %dl, %cl
; X32-NEXT: setne %dl
; X32-NEXT: andb %al, %dl
; X32-NEXT: orb %bl, %dl
; X32-NEXT: addl {{[0-9]+}}(%esp), %eax
; X32-NEXT: setb %al
; X32-NEXT: seto %dl
; X32-NEXT: orb %al, %dl
; X32-NEXT: xorl %eax, %eax
; X32-NEXT: popl %ebx
; X32-NEXT: retl
;
; X64-LABEL: knownbits_uaddo_saddo:
@ -191,20 +177,10 @@ define {i32, i1} @knownbits_usubo_ssubo(i64 %a0, i64 %a1) nounwind {
; X32-LABEL: knownbits_usubo_ssubo:
; X32: # %bb.0:
; X32-NEXT: movl {{[0-9]+}}(%esp), %eax
; X32-NEXT: movl {{[0-9]+}}(%esp), %ecx
; X32-NEXT: cmpl %eax, %ecx
; X32-NEXT: setb %dh
; X32-NEXT: setns %dl
; X32-NEXT: testl %ecx, %ecx
; X32-NEXT: setns %cl
; X32-NEXT: cmpb %dl, %cl
; X32-NEXT: setne %ch
; X32-NEXT: testl %eax, %eax
; X32-NEXT: setns %al
; X32-NEXT: cmpb %al, %cl
; X32-NEXT: setne %dl
; X32-NEXT: andb %ch, %dl
; X32-NEXT: orb %dh, %dl
; X32-NEXT: cmpl {{[0-9]+}}(%esp), %eax
; X32-NEXT: setb %al
; X32-NEXT: seto %dl
; X32-NEXT: orb %al, %dl
; X32-NEXT: xorl %eax, %eax
; X32-NEXT: retl
;

View File

@ -42,38 +42,25 @@ define i32 @func(i32 %x, i32 %y) nounwind {
define i64 @func2(i64 %x, i64 %y) nounwind {
; X86-LABEL: func2:
; X86: # %bb.0:
; X86-NEXT: pushl %ebp
; X86-NEXT: pushl %ebx
; X86-NEXT: pushl %edi
; X86-NEXT: pushl %esi
; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
; X86-NEXT: movl {{[0-9]+}}(%esp), %esi
; X86-NEXT: movl {{[0-9]+}}(%esp), %edi
; X86-NEXT: movl {{[0-9]+}}(%esp), %ebx
; X86-NEXT: addl {{[0-9]+}}(%esp), %edi
; X86-NEXT: movl %ebx, %ebp
; X86-NEXT: adcl %esi, %ebp
; X86-NEXT: movl %ebp, %eax
; X86-NEXT: addl {{[0-9]+}}(%esp), %ecx
; X86-NEXT: adcl {{[0-9]+}}(%esp), %esi
; X86-NEXT: seto %bl
; X86-NEXT: movl %esi, %eax
; X86-NEXT: sarl $31, %eax
; X86-NEXT: xorl %ecx, %ecx
; X86-NEXT: testl %ebp, %ebp
; X86-NEXT: setns %cl
; X86-NEXT: movl %ecx, %edx
; X86-NEXT: addl $2147483647, %edx # imm = 0x7FFFFFFF
; X86-NEXT: testl %ebx, %ebx
; X86-NEXT: setns %bl
; X86-NEXT: cmpb %cl, %bl
; X86-NEXT: setne %cl
; X86-NEXT: testb %bl, %bl
; X86-NEXT: cmovel %ecx, %eax
; X86-NEXT: xorl %edx, %edx
; X86-NEXT: testl %esi, %esi
; X86-NEXT: setns %ch
; X86-NEXT: cmpb %ch, %bl
; X86-NEXT: sete %ch
; X86-NEXT: testb %cl, %ch
; X86-NEXT: cmovel %ebp, %edx
; X86-NEXT: cmovel %edi, %eax
; X86-NEXT: setns %dl
; X86-NEXT: addl $2147483647, %edx # imm = 0x7FFFFFFF
; X86-NEXT: testb %bl, %bl
; X86-NEXT: cmovel %esi, %edx
; X86-NEXT: popl %esi
; X86-NEXT: popl %edi
; X86-NEXT: popl %ebx
; X86-NEXT: popl %ebp
; X86-NEXT: retl
;
; X64-LABEL: func2:

View File

@ -44,38 +44,25 @@ define i32 @func32(i32 %x, i32 %y, i32 %z) nounwind {
define i64 @func64(i64 %x, i64 %y, i64 %z) nounwind {
; X86-LABEL: func64:
; X86: # %bb.0:
; X86-NEXT: pushl %ebp
; X86-NEXT: pushl %ebx
; X86-NEXT: pushl %edi
; X86-NEXT: pushl %esi
; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
; X86-NEXT: movl {{[0-9]+}}(%esp), %esi
; X86-NEXT: movl {{[0-9]+}}(%esp), %edi
; X86-NEXT: movl {{[0-9]+}}(%esp), %ebx
; X86-NEXT: addl {{[0-9]+}}(%esp), %edi
; X86-NEXT: movl %ebx, %ebp
; X86-NEXT: adcl %esi, %ebp
; X86-NEXT: movl %ebp, %eax
; X86-NEXT: addl {{[0-9]+}}(%esp), %ecx
; X86-NEXT: adcl {{[0-9]+}}(%esp), %esi
; X86-NEXT: seto %bl
; X86-NEXT: movl %esi, %eax
; X86-NEXT: sarl $31, %eax
; X86-NEXT: xorl %ecx, %ecx
; X86-NEXT: testl %ebp, %ebp
; X86-NEXT: setns %cl
; X86-NEXT: movl %ecx, %edx
; X86-NEXT: addl $2147483647, %edx # imm = 0x7FFFFFFF
; X86-NEXT: testl %ebx, %ebx
; X86-NEXT: setns %bl
; X86-NEXT: cmpb %cl, %bl
; X86-NEXT: setne %cl
; X86-NEXT: testb %bl, %bl
; X86-NEXT: cmovel %ecx, %eax
; X86-NEXT: xorl %edx, %edx
; X86-NEXT: testl %esi, %esi
; X86-NEXT: setns %ch
; X86-NEXT: cmpb %ch, %bl
; X86-NEXT: sete %ch
; X86-NEXT: testb %cl, %ch
; X86-NEXT: cmovel %ebp, %edx
; X86-NEXT: cmovel %edi, %eax
; X86-NEXT: setns %dl
; X86-NEXT: addl $2147483647, %edx # imm = 0x7FFFFFFF
; X86-NEXT: testb %bl, %bl
; X86-NEXT: cmovel %esi, %edx
; X86-NEXT: popl %esi
; X86-NEXT: popl %edi
; X86-NEXT: popl %ebx
; X86-NEXT: popl %ebp
; X86-NEXT: retl
;
; X64-LABEL: func64:

View File

@ -1940,124 +1940,78 @@ define <8 x i64> @v8i64(<8 x i64> %x, <8 x i64> %y) nounwind {
define <2 x i128> @v2i128(<2 x i128> %x, <2 x i128> %y) nounwind {
; SSE-LABEL: v2i128:
; SSE: # %bb.0:
; SSE-NEXT: pushq %r15
; SSE-NEXT: pushq %r14
; SSE-NEXT: pushq %r13
; SSE-NEXT: pushq %r12
; SSE-NEXT: pushq %rbx
; SSE-NEXT: movq %rdi, %rax
; SSE-NEXT: movq {{[0-9]+}}(%rsp), %r11
; SSE-NEXT: movq {{[0-9]+}}(%rsp), %r14
; SSE-NEXT: addq {{[0-9]+}}(%rsp), %rcx
; SSE-NEXT: movq %r8, %r13
; SSE-NEXT: adcq %r14, %r13
; SSE-NEXT: movq %r13, %r10
; SSE-NEXT: sarq $63, %r10
; SSE-NEXT: xorl %edi, %edi
; SSE-NEXT: testq %r13, %r13
; SSE-NEXT: setns %dil
; SSE-NEXT: movabsq $9223372036854775807, %r12 # imm = 0x7FFFFFFFFFFFFFFF
; SSE-NEXT: leaq (%rdi,%r12), %r15
; SSE-NEXT: adcq {{[0-9]+}}(%rsp), %r8
; SSE-NEXT: seto %r10b
; SSE-NEXT: movq %r8, %rbx
; SSE-NEXT: sarq $63, %rbx
; SSE-NEXT: testb %r10b, %r10b
; SSE-NEXT: cmoveq %rcx, %rbx
; SSE-NEXT: xorl %ecx, %ecx
; SSE-NEXT: testq %r8, %r8
; SSE-NEXT: setns %r8b
; SSE-NEXT: cmpb %dil, %r8b
; SSE-NEXT: setne %dil
; SSE-NEXT: testq %r14, %r14
; SSE-NEXT: setns %bl
; SSE-NEXT: cmpb %bl, %r8b
; SSE-NEXT: sete %bl
; SSE-NEXT: testb %dil, %bl
; SSE-NEXT: cmoveq %r13, %r15
; SSE-NEXT: cmoveq %rcx, %r10
; SSE-NEXT: setns %cl
; SSE-NEXT: movabsq $9223372036854775807, %r11 # imm = 0x7FFFFFFFFFFFFFFF
; SSE-NEXT: addq %r11, %rcx
; SSE-NEXT: testb %r10b, %r10b
; SSE-NEXT: cmoveq %r8, %rcx
; SSE-NEXT: addq %r9, %rsi
; SSE-NEXT: adcq {{[0-9]+}}(%rsp), %rdx
; SSE-NEXT: seto %r8b
; SSE-NEXT: movq %rdx, %rdi
; SSE-NEXT: adcq %r11, %rdi
; SSE-NEXT: setns %bl
; SSE-NEXT: movzbl %bl, %ebx
; SSE-NEXT: addq %rbx, %r12
; SSE-NEXT: movq %rdi, %rcx
; SSE-NEXT: sarq $63, %rcx
; SSE-NEXT: testq %r11, %r11
; SSE-NEXT: setns %r8b
; SSE-NEXT: sarq $63, %rdi
; SSE-NEXT: testb %r8b, %r8b
; SSE-NEXT: cmoveq %rsi, %rdi
; SSE-NEXT: xorl %esi, %esi
; SSE-NEXT: testq %rdx, %rdx
; SSE-NEXT: setns %dl
; SSE-NEXT: cmpb %r8b, %dl
; SSE-NEXT: sete %r8b
; SSE-NEXT: cmpb %bl, %dl
; SSE-NEXT: setne %dl
; SSE-NEXT: testb %dl, %r8b
; SSE-NEXT: cmoveq %rsi, %rcx
; SSE-NEXT: cmoveq %rdi, %r12
; SSE-NEXT: movq %r15, 24(%rax)
; SSE-NEXT: movq %r10, 16(%rax)
; SSE-NEXT: movq %r12, 8(%rax)
; SSE-NEXT: movq %rcx, (%rax)
; SSE-NEXT: setns %sil
; SSE-NEXT: addq %r11, %rsi
; SSE-NEXT: testb %r8b, %r8b
; SSE-NEXT: cmoveq %rdx, %rsi
; SSE-NEXT: movq %rbx, 16(%rax)
; SSE-NEXT: movq %rdi, (%rax)
; SSE-NEXT: movq %rcx, 24(%rax)
; SSE-NEXT: movq %rsi, 8(%rax)
; SSE-NEXT: popq %rbx
; SSE-NEXT: popq %r12
; SSE-NEXT: popq %r13
; SSE-NEXT: popq %r14
; SSE-NEXT: popq %r15
; SSE-NEXT: retq
;
; AVX-LABEL: v2i128:
; AVX: # %bb.0:
; AVX-NEXT: pushq %r15
; AVX-NEXT: pushq %r14
; AVX-NEXT: pushq %r13
; AVX-NEXT: pushq %r12
; AVX-NEXT: pushq %rbx
; AVX-NEXT: movq %rdi, %rax
; AVX-NEXT: movq {{[0-9]+}}(%rsp), %r11
; AVX-NEXT: movq {{[0-9]+}}(%rsp), %r14
; AVX-NEXT: addq {{[0-9]+}}(%rsp), %rcx
; AVX-NEXT: movq %r8, %r13
; AVX-NEXT: adcq %r14, %r13
; AVX-NEXT: movq %r13, %r10
; AVX-NEXT: sarq $63, %r10
; AVX-NEXT: xorl %edi, %edi
; AVX-NEXT: testq %r13, %r13
; AVX-NEXT: setns %dil
; AVX-NEXT: movabsq $9223372036854775807, %r12 # imm = 0x7FFFFFFFFFFFFFFF
; AVX-NEXT: leaq (%rdi,%r12), %r15
; AVX-NEXT: adcq {{[0-9]+}}(%rsp), %r8
; AVX-NEXT: seto %r10b
; AVX-NEXT: movq %r8, %rbx
; AVX-NEXT: sarq $63, %rbx
; AVX-NEXT: testb %r10b, %r10b
; AVX-NEXT: cmoveq %rcx, %rbx
; AVX-NEXT: xorl %ecx, %ecx
; AVX-NEXT: testq %r8, %r8
; AVX-NEXT: setns %r8b
; AVX-NEXT: cmpb %dil, %r8b
; AVX-NEXT: setne %dil
; AVX-NEXT: testq %r14, %r14
; AVX-NEXT: setns %bl
; AVX-NEXT: cmpb %bl, %r8b
; AVX-NEXT: sete %bl
; AVX-NEXT: testb %dil, %bl
; AVX-NEXT: cmoveq %r13, %r15
; AVX-NEXT: cmoveq %rcx, %r10
; AVX-NEXT: setns %cl
; AVX-NEXT: movabsq $9223372036854775807, %r11 # imm = 0x7FFFFFFFFFFFFFFF
; AVX-NEXT: addq %r11, %rcx
; AVX-NEXT: testb %r10b, %r10b
; AVX-NEXT: cmoveq %r8, %rcx
; AVX-NEXT: addq %r9, %rsi
; AVX-NEXT: adcq {{[0-9]+}}(%rsp), %rdx
; AVX-NEXT: seto %r8b
; AVX-NEXT: movq %rdx, %rdi
; AVX-NEXT: adcq %r11, %rdi
; AVX-NEXT: setns %bl
; AVX-NEXT: movzbl %bl, %ebx
; AVX-NEXT: addq %rbx, %r12
; AVX-NEXT: movq %rdi, %rcx
; AVX-NEXT: sarq $63, %rcx
; AVX-NEXT: testq %r11, %r11
; AVX-NEXT: setns %r8b
; AVX-NEXT: sarq $63, %rdi
; AVX-NEXT: testb %r8b, %r8b
; AVX-NEXT: cmoveq %rsi, %rdi
; AVX-NEXT: xorl %esi, %esi
; AVX-NEXT: testq %rdx, %rdx
; AVX-NEXT: setns %dl
; AVX-NEXT: cmpb %r8b, %dl
; AVX-NEXT: sete %r8b
; AVX-NEXT: cmpb %bl, %dl
; AVX-NEXT: setne %dl
; AVX-NEXT: testb %dl, %r8b
; AVX-NEXT: cmoveq %rsi, %rcx
; AVX-NEXT: cmoveq %rdi, %r12
; AVX-NEXT: movq %r15, 24(%rax)
; AVX-NEXT: movq %r10, 16(%rax)
; AVX-NEXT: movq %r12, 8(%rax)
; AVX-NEXT: movq %rcx, (%rax)
; AVX-NEXT: setns %sil
; AVX-NEXT: addq %r11, %rsi
; AVX-NEXT: testb %r8b, %r8b
; AVX-NEXT: cmoveq %rdx, %rsi
; AVX-NEXT: movq %rbx, 16(%rax)
; AVX-NEXT: movq %rdi, (%rax)
; AVX-NEXT: movq %rcx, 24(%rax)
; AVX-NEXT: movq %rsi, 8(%rax)
; AVX-NEXT: popq %rbx
; AVX-NEXT: popq %r12
; AVX-NEXT: popq %r13
; AVX-NEXT: popq %r14
; AVX-NEXT: popq %r15
; AVX-NEXT: retq
%z = call <2 x i128> @llvm.sadd.sat.v2i128(<2 x i128> %x, <2 x i128> %y)
ret <2 x i128> %z

View File

@ -38,38 +38,25 @@ define i32 @func(i32 %x, i32 %y) nounwind {
define i64 @func2(i64 %x, i64 %y) nounwind {
; X86-LABEL: func2:
; X86: # %bb.0:
; X86-NEXT: pushl %ebp
; X86-NEXT: pushl %ebx
; X86-NEXT: pushl %edi
; X86-NEXT: pushl %esi
; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
; X86-NEXT: movl {{[0-9]+}}(%esp), %esi
; X86-NEXT: movl {{[0-9]+}}(%esp), %edi
; X86-NEXT: movl {{[0-9]+}}(%esp), %ebx
; X86-NEXT: subl {{[0-9]+}}(%esp), %edi
; X86-NEXT: movl %ebx, %ebp
; X86-NEXT: sbbl %esi, %ebp
; X86-NEXT: movl %ebp, %eax
; X86-NEXT: subl {{[0-9]+}}(%esp), %ecx
; X86-NEXT: sbbl {{[0-9]+}}(%esp), %esi
; X86-NEXT: seto %bl
; X86-NEXT: movl %esi, %eax
; X86-NEXT: sarl $31, %eax
; X86-NEXT: xorl %ecx, %ecx
; X86-NEXT: testl %ebp, %ebp
; X86-NEXT: setns %cl
; X86-NEXT: movl %ecx, %edx
; X86-NEXT: addl $2147483647, %edx # imm = 0x7FFFFFFF
; X86-NEXT: testl %ebx, %ebx
; X86-NEXT: setns %bl
; X86-NEXT: cmpb %cl, %bl
; X86-NEXT: setne %cl
; X86-NEXT: testb %bl, %bl
; X86-NEXT: cmovel %ecx, %eax
; X86-NEXT: xorl %edx, %edx
; X86-NEXT: testl %esi, %esi
; X86-NEXT: setns %ch
; X86-NEXT: cmpb %ch, %bl
; X86-NEXT: setne %ch
; X86-NEXT: testb %cl, %ch
; X86-NEXT: cmovel %ebp, %edx
; X86-NEXT: cmovel %edi, %eax
; X86-NEXT: setns %dl
; X86-NEXT: addl $2147483647, %edx # imm = 0x7FFFFFFF
; X86-NEXT: testb %bl, %bl
; X86-NEXT: cmovel %esi, %edx
; X86-NEXT: popl %esi
; X86-NEXT: popl %edi
; X86-NEXT: popl %ebx
; X86-NEXT: popl %ebp
; X86-NEXT: retl
;
; X64-LABEL: func2:

View File

@ -40,38 +40,25 @@ define i32 @func32(i32 %x, i32 %y, i32 %z) nounwind {
define i64 @func64(i64 %x, i64 %y, i64 %z) nounwind {
; X86-LABEL: func64:
; X86: # %bb.0:
; X86-NEXT: pushl %ebp
; X86-NEXT: pushl %ebx
; X86-NEXT: pushl %edi
; X86-NEXT: pushl %esi
; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
; X86-NEXT: movl {{[0-9]+}}(%esp), %esi
; X86-NEXT: movl {{[0-9]+}}(%esp), %edi
; X86-NEXT: movl {{[0-9]+}}(%esp), %ebx
; X86-NEXT: subl {{[0-9]+}}(%esp), %edi
; X86-NEXT: movl %ebx, %ebp
; X86-NEXT: sbbl %esi, %ebp
; X86-NEXT: movl %ebp, %eax
; X86-NEXT: subl {{[0-9]+}}(%esp), %ecx
; X86-NEXT: sbbl {{[0-9]+}}(%esp), %esi
; X86-NEXT: seto %bl
; X86-NEXT: movl %esi, %eax
; X86-NEXT: sarl $31, %eax
; X86-NEXT: xorl %ecx, %ecx
; X86-NEXT: testl %ebp, %ebp
; X86-NEXT: setns %cl
; X86-NEXT: movl %ecx, %edx
; X86-NEXT: addl $2147483647, %edx # imm = 0x7FFFFFFF
; X86-NEXT: testl %ebx, %ebx
; X86-NEXT: setns %bl
; X86-NEXT: cmpb %cl, %bl
; X86-NEXT: setne %cl
; X86-NEXT: testb %bl, %bl
; X86-NEXT: cmovel %ecx, %eax
; X86-NEXT: xorl %edx, %edx
; X86-NEXT: testl %esi, %esi
; X86-NEXT: setns %ch
; X86-NEXT: cmpb %ch, %bl
; X86-NEXT: setne %ch
; X86-NEXT: testb %cl, %ch
; X86-NEXT: cmovel %ebp, %edx
; X86-NEXT: cmovel %edi, %eax
; X86-NEXT: setns %dl
; X86-NEXT: addl $2147483647, %edx # imm = 0x7FFFFFFF
; X86-NEXT: testb %bl, %bl
; X86-NEXT: cmovel %esi, %edx
; X86-NEXT: popl %esi
; X86-NEXT: popl %edi
; X86-NEXT: popl %ebx
; X86-NEXT: popl %ebp
; X86-NEXT: retl
;
; X64-LABEL: func64:

View File

@ -2145,124 +2145,78 @@ define <8 x i64> @v8i64(<8 x i64> %x, <8 x i64> %y) nounwind {
define <2 x i128> @v2i128(<2 x i128> %x, <2 x i128> %y) nounwind {
; SSE-LABEL: v2i128:
; SSE: # %bb.0:
; SSE-NEXT: pushq %r15
; SSE-NEXT: pushq %r14
; SSE-NEXT: pushq %r13
; SSE-NEXT: pushq %r12
; SSE-NEXT: pushq %rbx
; SSE-NEXT: movq %rdi, %rax
; SSE-NEXT: movq {{[0-9]+}}(%rsp), %r11
; SSE-NEXT: movq {{[0-9]+}}(%rsp), %r14
; SSE-NEXT: subq {{[0-9]+}}(%rsp), %rcx
; SSE-NEXT: movq %r8, %r13
; SSE-NEXT: sbbq %r14, %r13
; SSE-NEXT: movq %r13, %r10
; SSE-NEXT: sarq $63, %r10
; SSE-NEXT: xorl %edi, %edi
; SSE-NEXT: testq %r13, %r13
; SSE-NEXT: setns %dil
; SSE-NEXT: movabsq $9223372036854775807, %r12 # imm = 0x7FFFFFFFFFFFFFFF
; SSE-NEXT: leaq (%rdi,%r12), %r15
; SSE-NEXT: sbbq {{[0-9]+}}(%rsp), %r8
; SSE-NEXT: seto %r10b
; SSE-NEXT: movq %r8, %rbx
; SSE-NEXT: sarq $63, %rbx
; SSE-NEXT: testb %r10b, %r10b
; SSE-NEXT: cmoveq %rcx, %rbx
; SSE-NEXT: xorl %ecx, %ecx
; SSE-NEXT: testq %r8, %r8
; SSE-NEXT: setns %r8b
; SSE-NEXT: cmpb %dil, %r8b
; SSE-NEXT: setne %dil
; SSE-NEXT: testq %r14, %r14
; SSE-NEXT: setns %bl
; SSE-NEXT: cmpb %bl, %r8b
; SSE-NEXT: setne %bl
; SSE-NEXT: testb %dil, %bl
; SSE-NEXT: cmoveq %r13, %r15
; SSE-NEXT: cmoveq %rcx, %r10
; SSE-NEXT: setns %cl
; SSE-NEXT: movabsq $9223372036854775807, %r11 # imm = 0x7FFFFFFFFFFFFFFF
; SSE-NEXT: addq %r11, %rcx
; SSE-NEXT: testb %r10b, %r10b
; SSE-NEXT: cmoveq %r8, %rcx
; SSE-NEXT: subq %r9, %rsi
; SSE-NEXT: sbbq {{[0-9]+}}(%rsp), %rdx
; SSE-NEXT: seto %r8b
; SSE-NEXT: movq %rdx, %rdi
; SSE-NEXT: sbbq %r11, %rdi
; SSE-NEXT: setns %bl
; SSE-NEXT: movzbl %bl, %ebx
; SSE-NEXT: addq %rbx, %r12
; SSE-NEXT: movq %rdi, %rcx
; SSE-NEXT: sarq $63, %rcx
; SSE-NEXT: testq %r11, %r11
; SSE-NEXT: setns %r8b
; SSE-NEXT: sarq $63, %rdi
; SSE-NEXT: testb %r8b, %r8b
; SSE-NEXT: cmoveq %rsi, %rdi
; SSE-NEXT: xorl %esi, %esi
; SSE-NEXT: testq %rdx, %rdx
; SSE-NEXT: setns %dl
; SSE-NEXT: cmpb %r8b, %dl
; SSE-NEXT: setne %r8b
; SSE-NEXT: cmpb %bl, %dl
; SSE-NEXT: setne %dl
; SSE-NEXT: testb %dl, %r8b
; SSE-NEXT: cmoveq %rsi, %rcx
; SSE-NEXT: cmoveq %rdi, %r12
; SSE-NEXT: movq %r15, 24(%rax)
; SSE-NEXT: movq %r10, 16(%rax)
; SSE-NEXT: movq %r12, 8(%rax)
; SSE-NEXT: movq %rcx, (%rax)
; SSE-NEXT: setns %sil
; SSE-NEXT: addq %r11, %rsi
; SSE-NEXT: testb %r8b, %r8b
; SSE-NEXT: cmoveq %rdx, %rsi
; SSE-NEXT: movq %rbx, 16(%rax)
; SSE-NEXT: movq %rdi, (%rax)
; SSE-NEXT: movq %rcx, 24(%rax)
; SSE-NEXT: movq %rsi, 8(%rax)
; SSE-NEXT: popq %rbx
; SSE-NEXT: popq %r12
; SSE-NEXT: popq %r13
; SSE-NEXT: popq %r14
; SSE-NEXT: popq %r15
; SSE-NEXT: retq
;
; AVX-LABEL: v2i128:
; AVX: # %bb.0:
; AVX-NEXT: pushq %r15
; AVX-NEXT: pushq %r14
; AVX-NEXT: pushq %r13
; AVX-NEXT: pushq %r12
; AVX-NEXT: pushq %rbx
; AVX-NEXT: movq %rdi, %rax
; AVX-NEXT: movq {{[0-9]+}}(%rsp), %r11
; AVX-NEXT: movq {{[0-9]+}}(%rsp), %r14
; AVX-NEXT: subq {{[0-9]+}}(%rsp), %rcx
; AVX-NEXT: movq %r8, %r13
; AVX-NEXT: sbbq %r14, %r13
; AVX-NEXT: movq %r13, %r10
; AVX-NEXT: sarq $63, %r10
; AVX-NEXT: xorl %edi, %edi
; AVX-NEXT: testq %r13, %r13
; AVX-NEXT: setns %dil
; AVX-NEXT: movabsq $9223372036854775807, %r12 # imm = 0x7FFFFFFFFFFFFFFF
; AVX-NEXT: leaq (%rdi,%r12), %r15
; AVX-NEXT: sbbq {{[0-9]+}}(%rsp), %r8
; AVX-NEXT: seto %r10b
; AVX-NEXT: movq %r8, %rbx
; AVX-NEXT: sarq $63, %rbx
; AVX-NEXT: testb %r10b, %r10b
; AVX-NEXT: cmoveq %rcx, %rbx
; AVX-NEXT: xorl %ecx, %ecx
; AVX-NEXT: testq %r8, %r8
; AVX-NEXT: setns %r8b
; AVX-NEXT: cmpb %dil, %r8b
; AVX-NEXT: setne %dil
; AVX-NEXT: testq %r14, %r14
; AVX-NEXT: setns %bl
; AVX-NEXT: cmpb %bl, %r8b
; AVX-NEXT: setne %bl
; AVX-NEXT: testb %dil, %bl
; AVX-NEXT: cmoveq %r13, %r15
; AVX-NEXT: cmoveq %rcx, %r10
; AVX-NEXT: setns %cl
; AVX-NEXT: movabsq $9223372036854775807, %r11 # imm = 0x7FFFFFFFFFFFFFFF
; AVX-NEXT: addq %r11, %rcx
; AVX-NEXT: testb %r10b, %r10b
; AVX-NEXT: cmoveq %r8, %rcx
; AVX-NEXT: subq %r9, %rsi
; AVX-NEXT: sbbq {{[0-9]+}}(%rsp), %rdx
; AVX-NEXT: seto %r8b
; AVX-NEXT: movq %rdx, %rdi
; AVX-NEXT: sbbq %r11, %rdi
; AVX-NEXT: setns %bl
; AVX-NEXT: movzbl %bl, %ebx
; AVX-NEXT: addq %rbx, %r12
; AVX-NEXT: movq %rdi, %rcx
; AVX-NEXT: sarq $63, %rcx
; AVX-NEXT: testq %r11, %r11
; AVX-NEXT: setns %r8b
; AVX-NEXT: sarq $63, %rdi
; AVX-NEXT: testb %r8b, %r8b
; AVX-NEXT: cmoveq %rsi, %rdi
; AVX-NEXT: xorl %esi, %esi
; AVX-NEXT: testq %rdx, %rdx
; AVX-NEXT: setns %dl
; AVX-NEXT: cmpb %r8b, %dl
; AVX-NEXT: setne %r8b
; AVX-NEXT: cmpb %bl, %dl
; AVX-NEXT: setne %dl
; AVX-NEXT: testb %dl, %r8b
; AVX-NEXT: cmoveq %rsi, %rcx
; AVX-NEXT: cmoveq %rdi, %r12
; AVX-NEXT: movq %r15, 24(%rax)
; AVX-NEXT: movq %r10, 16(%rax)
; AVX-NEXT: movq %r12, 8(%rax)
; AVX-NEXT: movq %rcx, (%rax)
; AVX-NEXT: setns %sil
; AVX-NEXT: addq %r11, %rsi
; AVX-NEXT: testb %r8b, %r8b
; AVX-NEXT: cmoveq %rdx, %rsi
; AVX-NEXT: movq %rbx, 16(%rax)
; AVX-NEXT: movq %rdi, (%rax)
; AVX-NEXT: movq %rcx, 24(%rax)
; AVX-NEXT: movq %rsi, 8(%rax)
; AVX-NEXT: popq %rbx
; AVX-NEXT: popq %r12
; AVX-NEXT: popq %r13
; AVX-NEXT: popq %r14
; AVX-NEXT: popq %r15
; AVX-NEXT: retq
%z = call <2 x i128> @llvm.ssub.sat.v2i128(<2 x i128> %x, <2 x i128> %y)
ret <2 x i128> %z

View File

@ -1145,275 +1145,131 @@ define <4 x i32> @saddo_v4i1(<4 x i1> %a0, <4 x i1> %a1, <4 x i1>* %p2) nounwind
define <2 x i32> @saddo_v2i128(<2 x i128> %a0, <2 x i128> %a1, <2 x i128>* %p2) nounwind {
; SSE2-LABEL: saddo_v2i128:
; SSE2: # %bb.0:
; SSE2-NEXT: pushq %rbp
; SSE2-NEXT: pushq %rbx
; SSE2-NEXT: movq {{[0-9]+}}(%rsp), %r11
; SSE2-NEXT: movq {{[0-9]+}}(%rsp), %r10
; SSE2-NEXT: testq %r9, %r9
; SSE2-NEXT: setns %al
; SSE2-NEXT: testq %rsi, %rsi
; SSE2-NEXT: setns %bl
; SSE2-NEXT: cmpb %al, %bl
; SSE2-NEXT: sete %bpl
; SSE2-NEXT: addq %r8, %rdi
; SSE2-NEXT: adcq %r9, %rsi
; SSE2-NEXT: setns %al
; SSE2-NEXT: cmpb %al, %bl
; SSE2-NEXT: setne %al
; SSE2-NEXT: andb %bpl, %al
; SSE2-NEXT: seto %r8b
; SSE2-NEXT: addq {{[0-9]+}}(%rsp), %rdx
; SSE2-NEXT: movq %rcx, %rbp
; SSE2-NEXT: adcq %r10, %rbp
; SSE2-NEXT: setns %bl
; SSE2-NEXT: testq %rcx, %rcx
; SSE2-NEXT: setns %cl
; SSE2-NEXT: cmpb %bl, %cl
; SSE2-NEXT: setne %r8b
; SSE2-NEXT: testq %r10, %r10
; SSE2-NEXT: setns %bl
; SSE2-NEXT: cmpb %bl, %cl
; SSE2-NEXT: sete %cl
; SSE2-NEXT: andb %r8b, %cl
; SSE2-NEXT: movzbl %cl, %ecx
; SSE2-NEXT: negl %ecx
; SSE2-NEXT: movd %ecx, %xmm1
; SSE2-NEXT: adcq {{[0-9]+}}(%rsp), %rcx
; SSE2-NEXT: seto %al
; SSE2-NEXT: movzbl %al, %eax
; SSE2-NEXT: negl %eax
; SSE2-NEXT: movd %eax, %xmm1
; SSE2-NEXT: movzbl %r8b, %eax
; SSE2-NEXT: negl %eax
; SSE2-NEXT: movd %eax, %xmm0
; SSE2-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
; SSE2-NEXT: movq %rdx, 16(%r11)
; SSE2-NEXT: movq %rdi, (%r11)
; SSE2-NEXT: movq %rbp, 24(%r11)
; SSE2-NEXT: movq %rsi, 8(%r11)
; SSE2-NEXT: popq %rbx
; SSE2-NEXT: popq %rbp
; SSE2-NEXT: movq %rdx, 16(%r10)
; SSE2-NEXT: movq %rdi, (%r10)
; SSE2-NEXT: movq %rcx, 24(%r10)
; SSE2-NEXT: movq %rsi, 8(%r10)
; SSE2-NEXT: retq
;
; SSSE3-LABEL: saddo_v2i128:
; SSSE3: # %bb.0:
; SSSE3-NEXT: pushq %rbp
; SSSE3-NEXT: pushq %rbx
; SSSE3-NEXT: movq {{[0-9]+}}(%rsp), %r11
; SSSE3-NEXT: movq {{[0-9]+}}(%rsp), %r10
; SSSE3-NEXT: testq %r9, %r9
; SSSE3-NEXT: setns %al
; SSSE3-NEXT: testq %rsi, %rsi
; SSSE3-NEXT: setns %bl
; SSSE3-NEXT: cmpb %al, %bl
; SSSE3-NEXT: sete %bpl
; SSSE3-NEXT: addq %r8, %rdi
; SSSE3-NEXT: adcq %r9, %rsi
; SSSE3-NEXT: setns %al
; SSSE3-NEXT: cmpb %al, %bl
; SSSE3-NEXT: setne %al
; SSSE3-NEXT: andb %bpl, %al
; SSSE3-NEXT: seto %r8b
; SSSE3-NEXT: addq {{[0-9]+}}(%rsp), %rdx
; SSSE3-NEXT: movq %rcx, %rbp
; SSSE3-NEXT: adcq %r10, %rbp
; SSSE3-NEXT: setns %bl
; SSSE3-NEXT: testq %rcx, %rcx
; SSSE3-NEXT: setns %cl
; SSSE3-NEXT: cmpb %bl, %cl
; SSSE3-NEXT: setne %r8b
; SSSE3-NEXT: testq %r10, %r10
; SSSE3-NEXT: setns %bl
; SSSE3-NEXT: cmpb %bl, %cl
; SSSE3-NEXT: sete %cl
; SSSE3-NEXT: andb %r8b, %cl
; SSSE3-NEXT: movzbl %cl, %ecx
; SSSE3-NEXT: negl %ecx
; SSSE3-NEXT: movd %ecx, %xmm1
; SSSE3-NEXT: adcq {{[0-9]+}}(%rsp), %rcx
; SSSE3-NEXT: seto %al
; SSSE3-NEXT: movzbl %al, %eax
; SSSE3-NEXT: negl %eax
; SSSE3-NEXT: movd %eax, %xmm1
; SSSE3-NEXT: movzbl %r8b, %eax
; SSSE3-NEXT: negl %eax
; SSSE3-NEXT: movd %eax, %xmm0
; SSSE3-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
; SSSE3-NEXT: movq %rdx, 16(%r11)
; SSSE3-NEXT: movq %rdi, (%r11)
; SSSE3-NEXT: movq %rbp, 24(%r11)
; SSSE3-NEXT: movq %rsi, 8(%r11)
; SSSE3-NEXT: popq %rbx
; SSSE3-NEXT: popq %rbp
; SSSE3-NEXT: movq %rdx, 16(%r10)
; SSSE3-NEXT: movq %rdi, (%r10)
; SSSE3-NEXT: movq %rcx, 24(%r10)
; SSSE3-NEXT: movq %rsi, 8(%r10)
; SSSE3-NEXT: retq
;
; SSE41-LABEL: saddo_v2i128:
; SSE41: # %bb.0:
; SSE41-NEXT: pushq %rbp
; SSE41-NEXT: pushq %rbx
; SSE41-NEXT: movq {{[0-9]+}}(%rsp), %r11
; SSE41-NEXT: movq {{[0-9]+}}(%rsp), %r10
; SSE41-NEXT: testq %r9, %r9
; SSE41-NEXT: setns %al
; SSE41-NEXT: testq %rsi, %rsi
; SSE41-NEXT: setns %bl
; SSE41-NEXT: cmpb %al, %bl
; SSE41-NEXT: sete %bpl
; SSE41-NEXT: addq %r8, %rdi
; SSE41-NEXT: adcq %r9, %rsi
; SSE41-NEXT: setns %al
; SSE41-NEXT: cmpb %al, %bl
; SSE41-NEXT: setne %al
; SSE41-NEXT: andb %bpl, %al
; SSE41-NEXT: seto %r8b
; SSE41-NEXT: addq {{[0-9]+}}(%rsp), %rdx
; SSE41-NEXT: movq %rcx, %rbp
; SSE41-NEXT: adcq %r10, %rbp
; SSE41-NEXT: setns %bl
; SSE41-NEXT: testq %rcx, %rcx
; SSE41-NEXT: setns %cl
; SSE41-NEXT: cmpb %bl, %cl
; SSE41-NEXT: setne %r8b
; SSE41-NEXT: testq %r10, %r10
; SSE41-NEXT: setns %bl
; SSE41-NEXT: cmpb %bl, %cl
; SSE41-NEXT: sete %cl
; SSE41-NEXT: andb %r8b, %cl
; SSE41-NEXT: movzbl %cl, %ecx
; SSE41-NEXT: negl %ecx
; SSE41-NEXT: movzbl %al, %eax
; SSE41-NEXT: adcq {{[0-9]+}}(%rsp), %rcx
; SSE41-NEXT: seto %al
; SSE41-NEXT: movzbl %al, %r9d
; SSE41-NEXT: negl %r9d
; SSE41-NEXT: movzbl %r8b, %eax
; SSE41-NEXT: negl %eax
; SSE41-NEXT: movd %eax, %xmm0
; SSE41-NEXT: pinsrd $1, %ecx, %xmm0
; SSE41-NEXT: movq %rdx, 16(%r11)
; SSE41-NEXT: movq %rdi, (%r11)
; SSE41-NEXT: movq %rbp, 24(%r11)
; SSE41-NEXT: movq %rsi, 8(%r11)
; SSE41-NEXT: popq %rbx
; SSE41-NEXT: popq %rbp
; SSE41-NEXT: pinsrd $1, %r9d, %xmm0
; SSE41-NEXT: movq %rdx, 16(%r10)
; SSE41-NEXT: movq %rdi, (%r10)
; SSE41-NEXT: movq %rcx, 24(%r10)
; SSE41-NEXT: movq %rsi, 8(%r10)
; SSE41-NEXT: retq
;
; AVX1-LABEL: saddo_v2i128:
; AVX1: # %bb.0:
; AVX1-NEXT: pushq %rbp
; AVX1-NEXT: pushq %rbx
; AVX1-NEXT: movq {{[0-9]+}}(%rsp), %r11
; AVX1-NEXT: movq {{[0-9]+}}(%rsp), %r10
; AVX1-NEXT: testq %r9, %r9
; AVX1-NEXT: setns %al
; AVX1-NEXT: testq %rsi, %rsi
; AVX1-NEXT: setns %bl
; AVX1-NEXT: cmpb %al, %bl
; AVX1-NEXT: sete %bpl
; AVX1-NEXT: addq %r8, %rdi
; AVX1-NEXT: adcq %r9, %rsi
; AVX1-NEXT: setns %al
; AVX1-NEXT: cmpb %al, %bl
; AVX1-NEXT: setne %al
; AVX1-NEXT: andb %bpl, %al
; AVX1-NEXT: seto %r8b
; AVX1-NEXT: addq {{[0-9]+}}(%rsp), %rdx
; AVX1-NEXT: movq %rcx, %rbp
; AVX1-NEXT: adcq %r10, %rbp
; AVX1-NEXT: setns %bl
; AVX1-NEXT: testq %rcx, %rcx
; AVX1-NEXT: setns %cl
; AVX1-NEXT: cmpb %bl, %cl
; AVX1-NEXT: setne %r8b
; AVX1-NEXT: testq %r10, %r10
; AVX1-NEXT: setns %bl
; AVX1-NEXT: cmpb %bl, %cl
; AVX1-NEXT: sete %cl
; AVX1-NEXT: andb %r8b, %cl
; AVX1-NEXT: movzbl %cl, %ecx
; AVX1-NEXT: negl %ecx
; AVX1-NEXT: movzbl %al, %eax
; AVX1-NEXT: adcq {{[0-9]+}}(%rsp), %rcx
; AVX1-NEXT: seto %al
; AVX1-NEXT: movzbl %al, %r9d
; AVX1-NEXT: negl %r9d
; AVX1-NEXT: movzbl %r8b, %eax
; AVX1-NEXT: negl %eax
; AVX1-NEXT: vmovd %eax, %xmm0
; AVX1-NEXT: vpinsrd $1, %ecx, %xmm0, %xmm0
; AVX1-NEXT: movq %rdx, 16(%r11)
; AVX1-NEXT: movq %rdi, (%r11)
; AVX1-NEXT: movq %rbp, 24(%r11)
; AVX1-NEXT: movq %rsi, 8(%r11)
; AVX1-NEXT: popq %rbx
; AVX1-NEXT: popq %rbp
; AVX1-NEXT: vpinsrd $1, %r9d, %xmm0, %xmm0
; AVX1-NEXT: movq %rdx, 16(%r10)
; AVX1-NEXT: movq %rdi, (%r10)
; AVX1-NEXT: movq %rcx, 24(%r10)
; AVX1-NEXT: movq %rsi, 8(%r10)
; AVX1-NEXT: retq
;
; AVX2-LABEL: saddo_v2i128:
; AVX2: # %bb.0:
; AVX2-NEXT: pushq %rbp
; AVX2-NEXT: pushq %rbx
; AVX2-NEXT: movq {{[0-9]+}}(%rsp), %r11
; AVX2-NEXT: movq {{[0-9]+}}(%rsp), %r10
; AVX2-NEXT: testq %r9, %r9
; AVX2-NEXT: setns %al
; AVX2-NEXT: testq %rsi, %rsi
; AVX2-NEXT: setns %bl
; AVX2-NEXT: cmpb %al, %bl
; AVX2-NEXT: sete %bpl
; AVX2-NEXT: addq %r8, %rdi
; AVX2-NEXT: adcq %r9, %rsi
; AVX2-NEXT: setns %al
; AVX2-NEXT: cmpb %al, %bl
; AVX2-NEXT: setne %al
; AVX2-NEXT: andb %bpl, %al
; AVX2-NEXT: seto %r8b
; AVX2-NEXT: addq {{[0-9]+}}(%rsp), %rdx
; AVX2-NEXT: movq %rcx, %rbp
; AVX2-NEXT: adcq %r10, %rbp
; AVX2-NEXT: setns %bl
; AVX2-NEXT: testq %rcx, %rcx
; AVX2-NEXT: setns %cl
; AVX2-NEXT: cmpb %bl, %cl
; AVX2-NEXT: setne %r8b
; AVX2-NEXT: testq %r10, %r10
; AVX2-NEXT: setns %bl
; AVX2-NEXT: cmpb %bl, %cl
; AVX2-NEXT: sete %cl
; AVX2-NEXT: andb %r8b, %cl
; AVX2-NEXT: movzbl %cl, %ecx
; AVX2-NEXT: negl %ecx
; AVX2-NEXT: movzbl %al, %eax
; AVX2-NEXT: adcq {{[0-9]+}}(%rsp), %rcx
; AVX2-NEXT: seto %al
; AVX2-NEXT: movzbl %al, %r9d
; AVX2-NEXT: negl %r9d
; AVX2-NEXT: movzbl %r8b, %eax
; AVX2-NEXT: negl %eax
; AVX2-NEXT: vmovd %eax, %xmm0
; AVX2-NEXT: vpinsrd $1, %ecx, %xmm0, %xmm0
; AVX2-NEXT: movq %rdx, 16(%r11)
; AVX2-NEXT: movq %rdi, (%r11)
; AVX2-NEXT: movq %rbp, 24(%r11)
; AVX2-NEXT: movq %rsi, 8(%r11)
; AVX2-NEXT: popq %rbx
; AVX2-NEXT: popq %rbp
; AVX2-NEXT: vpinsrd $1, %r9d, %xmm0, %xmm0
; AVX2-NEXT: movq %rdx, 16(%r10)
; AVX2-NEXT: movq %rdi, (%r10)
; AVX2-NEXT: movq %rcx, 24(%r10)
; AVX2-NEXT: movq %rsi, 8(%r10)
; AVX2-NEXT: retq
;
; AVX512-LABEL: saddo_v2i128:
; AVX512: # %bb.0:
; AVX512-NEXT: pushq %r14
; AVX512-NEXT: pushq %rbx
; AVX512-NEXT: movq {{[0-9]+}}(%rsp), %r10
; AVX512-NEXT: movq {{[0-9]+}}(%rsp), %r11
; AVX512-NEXT: addq {{[0-9]+}}(%rsp), %rdx
; AVX512-NEXT: movq %rcx, %r14
; AVX512-NEXT: adcq %r11, %r14
; AVX512-NEXT: setns %bl
; AVX512-NEXT: testq %rcx, %rcx
; AVX512-NEXT: setns %cl
; AVX512-NEXT: cmpb %bl, %cl
; AVX512-NEXT: setne %bl
; AVX512-NEXT: testq %r11, %r11
; AVX512-NEXT: setns %al
; AVX512-NEXT: cmpb %al, %cl
; AVX512-NEXT: sete %al
; AVX512-NEXT: andb %bl, %al
; AVX512-NEXT: adcq {{[0-9]+}}(%rsp), %rcx
; AVX512-NEXT: seto %al
; AVX512-NEXT: kmovd %eax, %k0
; AVX512-NEXT: testq %r9, %r9
; AVX512-NEXT: setns %al
; AVX512-NEXT: testq %rsi, %rsi
; AVX512-NEXT: setns %cl
; AVX512-NEXT: cmpb %al, %cl
; AVX512-NEXT: sete %al
; AVX512-NEXT: addq %r8, %rdi
; AVX512-NEXT: adcq %r9, %rsi
; AVX512-NEXT: setns %bl
; AVX512-NEXT: cmpb %bl, %cl
; AVX512-NEXT: setne %cl
; AVX512-NEXT: andb %al, %cl
; AVX512-NEXT: andl $1, %ecx
; AVX512-NEXT: kmovw %ecx, %k1
; AVX512-NEXT: seto %al
; AVX512-NEXT: andl $1, %eax
; AVX512-NEXT: kmovw %eax, %k1
; AVX512-NEXT: kshiftlw $1, %k0, %k0
; AVX512-NEXT: korw %k0, %k1, %k1
; AVX512-NEXT: vpcmpeqd %xmm0, %xmm0, %xmm0
; AVX512-NEXT: vmovdqa32 %xmm0, %xmm0 {%k1} {z}
; AVX512-NEXT: movq %rdx, 16(%r10)
; AVX512-NEXT: movq %rdi, (%r10)
; AVX512-NEXT: movq %r14, 24(%r10)
; AVX512-NEXT: movq %rcx, 24(%r10)
; AVX512-NEXT: movq %rsi, 8(%r10)
; AVX512-NEXT: popq %rbx
; AVX512-NEXT: popq %r14
; AVX512-NEXT: retq
%t = call {<2 x i128>, <2 x i1>} @llvm.sadd.with.overflow.v2i128(<2 x i128> %a0, <2 x i128> %a1)
%val = extractvalue {<2 x i128>, <2 x i1>} %t, 0

View File

@ -1154,275 +1154,131 @@ define <4 x i32> @ssubo_v4i1(<4 x i1> %a0, <4 x i1> %a1, <4 x i1>* %p2) nounwind
define <2 x i32> @ssubo_v2i128(<2 x i128> %a0, <2 x i128> %a1, <2 x i128>* %p2) nounwind {
; SSE2-LABEL: ssubo_v2i128:
; SSE2: # %bb.0:
; SSE2-NEXT: pushq %rbp
; SSE2-NEXT: pushq %rbx
; SSE2-NEXT: movq {{[0-9]+}}(%rsp), %r11
; SSE2-NEXT: movq {{[0-9]+}}(%rsp), %r10
; SSE2-NEXT: testq %r9, %r9
; SSE2-NEXT: setns %al
; SSE2-NEXT: testq %rsi, %rsi
; SSE2-NEXT: setns %bl
; SSE2-NEXT: cmpb %al, %bl
; SSE2-NEXT: setne %bpl
; SSE2-NEXT: subq %r8, %rdi
; SSE2-NEXT: sbbq %r9, %rsi
; SSE2-NEXT: setns %al
; SSE2-NEXT: cmpb %al, %bl
; SSE2-NEXT: setne %al
; SSE2-NEXT: andb %bpl, %al
; SSE2-NEXT: seto %r8b
; SSE2-NEXT: subq {{[0-9]+}}(%rsp), %rdx
; SSE2-NEXT: movq %rcx, %rbp
; SSE2-NEXT: sbbq %r10, %rbp
; SSE2-NEXT: setns %bl
; SSE2-NEXT: testq %rcx, %rcx
; SSE2-NEXT: setns %cl
; SSE2-NEXT: cmpb %bl, %cl
; SSE2-NEXT: setne %r8b
; SSE2-NEXT: testq %r10, %r10
; SSE2-NEXT: setns %bl
; SSE2-NEXT: cmpb %bl, %cl
; SSE2-NEXT: setne %cl
; SSE2-NEXT: andb %r8b, %cl
; SSE2-NEXT: movzbl %cl, %ecx
; SSE2-NEXT: negl %ecx
; SSE2-NEXT: movd %ecx, %xmm1
; SSE2-NEXT: sbbq {{[0-9]+}}(%rsp), %rcx
; SSE2-NEXT: seto %al
; SSE2-NEXT: movzbl %al, %eax
; SSE2-NEXT: negl %eax
; SSE2-NEXT: movd %eax, %xmm1
; SSE2-NEXT: movzbl %r8b, %eax
; SSE2-NEXT: negl %eax
; SSE2-NEXT: movd %eax, %xmm0
; SSE2-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
; SSE2-NEXT: movq %rdx, 16(%r11)
; SSE2-NEXT: movq %rdi, (%r11)
; SSE2-NEXT: movq %rbp, 24(%r11)
; SSE2-NEXT: movq %rsi, 8(%r11)
; SSE2-NEXT: popq %rbx
; SSE2-NEXT: popq %rbp
; SSE2-NEXT: movq %rdx, 16(%r10)
; SSE2-NEXT: movq %rdi, (%r10)
; SSE2-NEXT: movq %rcx, 24(%r10)
; SSE2-NEXT: movq %rsi, 8(%r10)
; SSE2-NEXT: retq
;
; SSSE3-LABEL: ssubo_v2i128:
; SSSE3: # %bb.0:
; SSSE3-NEXT: pushq %rbp
; SSSE3-NEXT: pushq %rbx
; SSSE3-NEXT: movq {{[0-9]+}}(%rsp), %r11
; SSSE3-NEXT: movq {{[0-9]+}}(%rsp), %r10
; SSSE3-NEXT: testq %r9, %r9
; SSSE3-NEXT: setns %al
; SSSE3-NEXT: testq %rsi, %rsi
; SSSE3-NEXT: setns %bl
; SSSE3-NEXT: cmpb %al, %bl
; SSSE3-NEXT: setne %bpl
; SSSE3-NEXT: subq %r8, %rdi
; SSSE3-NEXT: sbbq %r9, %rsi
; SSSE3-NEXT: setns %al
; SSSE3-NEXT: cmpb %al, %bl
; SSSE3-NEXT: setne %al
; SSSE3-NEXT: andb %bpl, %al
; SSSE3-NEXT: seto %r8b
; SSSE3-NEXT: subq {{[0-9]+}}(%rsp), %rdx
; SSSE3-NEXT: movq %rcx, %rbp
; SSSE3-NEXT: sbbq %r10, %rbp
; SSSE3-NEXT: setns %bl
; SSSE3-NEXT: testq %rcx, %rcx
; SSSE3-NEXT: setns %cl
; SSSE3-NEXT: cmpb %bl, %cl
; SSSE3-NEXT: setne %r8b
; SSSE3-NEXT: testq %r10, %r10
; SSSE3-NEXT: setns %bl
; SSSE3-NEXT: cmpb %bl, %cl
; SSSE3-NEXT: setne %cl
; SSSE3-NEXT: andb %r8b, %cl
; SSSE3-NEXT: movzbl %cl, %ecx
; SSSE3-NEXT: negl %ecx
; SSSE3-NEXT: movd %ecx, %xmm1
; SSSE3-NEXT: sbbq {{[0-9]+}}(%rsp), %rcx
; SSSE3-NEXT: seto %al
; SSSE3-NEXT: movzbl %al, %eax
; SSSE3-NEXT: negl %eax
; SSSE3-NEXT: movd %eax, %xmm1
; SSSE3-NEXT: movzbl %r8b, %eax
; SSSE3-NEXT: negl %eax
; SSSE3-NEXT: movd %eax, %xmm0
; SSSE3-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
; SSSE3-NEXT: movq %rdx, 16(%r11)
; SSSE3-NEXT: movq %rdi, (%r11)
; SSSE3-NEXT: movq %rbp, 24(%r11)
; SSSE3-NEXT: movq %rsi, 8(%r11)
; SSSE3-NEXT: popq %rbx
; SSSE3-NEXT: popq %rbp
; SSSE3-NEXT: movq %rdx, 16(%r10)
; SSSE3-NEXT: movq %rdi, (%r10)
; SSSE3-NEXT: movq %rcx, 24(%r10)
; SSSE3-NEXT: movq %rsi, 8(%r10)
; SSSE3-NEXT: retq
;
; SSE41-LABEL: ssubo_v2i128:
; SSE41: # %bb.0:
; SSE41-NEXT: pushq %rbp
; SSE41-NEXT: pushq %rbx
; SSE41-NEXT: movq {{[0-9]+}}(%rsp), %r11
; SSE41-NEXT: movq {{[0-9]+}}(%rsp), %r10
; SSE41-NEXT: testq %r9, %r9
; SSE41-NEXT: setns %al
; SSE41-NEXT: testq %rsi, %rsi
; SSE41-NEXT: setns %bl
; SSE41-NEXT: cmpb %al, %bl
; SSE41-NEXT: setne %bpl
; SSE41-NEXT: subq %r8, %rdi
; SSE41-NEXT: sbbq %r9, %rsi
; SSE41-NEXT: setns %al
; SSE41-NEXT: cmpb %al, %bl
; SSE41-NEXT: setne %al
; SSE41-NEXT: andb %bpl, %al
; SSE41-NEXT: seto %r8b
; SSE41-NEXT: subq {{[0-9]+}}(%rsp), %rdx
; SSE41-NEXT: movq %rcx, %rbp
; SSE41-NEXT: sbbq %r10, %rbp
; SSE41-NEXT: setns %bl
; SSE41-NEXT: testq %rcx, %rcx
; SSE41-NEXT: setns %cl
; SSE41-NEXT: cmpb %bl, %cl
; SSE41-NEXT: setne %r8b
; SSE41-NEXT: testq %r10, %r10
; SSE41-NEXT: setns %bl
; SSE41-NEXT: cmpb %bl, %cl
; SSE41-NEXT: setne %cl
; SSE41-NEXT: andb %r8b, %cl
; SSE41-NEXT: movzbl %cl, %ecx
; SSE41-NEXT: negl %ecx
; SSE41-NEXT: movzbl %al, %eax
; SSE41-NEXT: sbbq {{[0-9]+}}(%rsp), %rcx
; SSE41-NEXT: seto %al
; SSE41-NEXT: movzbl %al, %r9d
; SSE41-NEXT: negl %r9d
; SSE41-NEXT: movzbl %r8b, %eax
; SSE41-NEXT: negl %eax
; SSE41-NEXT: movd %eax, %xmm0
; SSE41-NEXT: pinsrd $1, %ecx, %xmm0
; SSE41-NEXT: movq %rdx, 16(%r11)
; SSE41-NEXT: movq %rdi, (%r11)
; SSE41-NEXT: movq %rbp, 24(%r11)
; SSE41-NEXT: movq %rsi, 8(%r11)
; SSE41-NEXT: popq %rbx
; SSE41-NEXT: popq %rbp
; SSE41-NEXT: pinsrd $1, %r9d, %xmm0
; SSE41-NEXT: movq %rdx, 16(%r10)
; SSE41-NEXT: movq %rdi, (%r10)
; SSE41-NEXT: movq %rcx, 24(%r10)
; SSE41-NEXT: movq %rsi, 8(%r10)
; SSE41-NEXT: retq
;
; AVX1-LABEL: ssubo_v2i128:
; AVX1: # %bb.0:
; AVX1-NEXT: pushq %rbp
; AVX1-NEXT: pushq %rbx
; AVX1-NEXT: movq {{[0-9]+}}(%rsp), %r11
; AVX1-NEXT: movq {{[0-9]+}}(%rsp), %r10
; AVX1-NEXT: testq %r9, %r9
; AVX1-NEXT: setns %al
; AVX1-NEXT: testq %rsi, %rsi
; AVX1-NEXT: setns %bl
; AVX1-NEXT: cmpb %al, %bl
; AVX1-NEXT: setne %bpl
; AVX1-NEXT: subq %r8, %rdi
; AVX1-NEXT: sbbq %r9, %rsi
; AVX1-NEXT: setns %al
; AVX1-NEXT: cmpb %al, %bl
; AVX1-NEXT: setne %al
; AVX1-NEXT: andb %bpl, %al
; AVX1-NEXT: seto %r8b
; AVX1-NEXT: subq {{[0-9]+}}(%rsp), %rdx
; AVX1-NEXT: movq %rcx, %rbp
; AVX1-NEXT: sbbq %r10, %rbp
; AVX1-NEXT: setns %bl
; AVX1-NEXT: testq %rcx, %rcx
; AVX1-NEXT: setns %cl
; AVX1-NEXT: cmpb %bl, %cl
; AVX1-NEXT: setne %r8b
; AVX1-NEXT: testq %r10, %r10
; AVX1-NEXT: setns %bl
; AVX1-NEXT: cmpb %bl, %cl
; AVX1-NEXT: setne %cl
; AVX1-NEXT: andb %r8b, %cl
; AVX1-NEXT: movzbl %cl, %ecx
; AVX1-NEXT: negl %ecx
; AVX1-NEXT: movzbl %al, %eax
; AVX1-NEXT: sbbq {{[0-9]+}}(%rsp), %rcx
; AVX1-NEXT: seto %al
; AVX1-NEXT: movzbl %al, %r9d
; AVX1-NEXT: negl %r9d
; AVX1-NEXT: movzbl %r8b, %eax
; AVX1-NEXT: negl %eax
; AVX1-NEXT: vmovd %eax, %xmm0
; AVX1-NEXT: vpinsrd $1, %ecx, %xmm0, %xmm0
; AVX1-NEXT: movq %rdx, 16(%r11)
; AVX1-NEXT: movq %rdi, (%r11)
; AVX1-NEXT: movq %rbp, 24(%r11)
; AVX1-NEXT: movq %rsi, 8(%r11)
; AVX1-NEXT: popq %rbx
; AVX1-NEXT: popq %rbp
; AVX1-NEXT: vpinsrd $1, %r9d, %xmm0, %xmm0
; AVX1-NEXT: movq %rdx, 16(%r10)
; AVX1-NEXT: movq %rdi, (%r10)
; AVX1-NEXT: movq %rcx, 24(%r10)
; AVX1-NEXT: movq %rsi, 8(%r10)
; AVX1-NEXT: retq
;
; AVX2-LABEL: ssubo_v2i128:
; AVX2: # %bb.0:
; AVX2-NEXT: pushq %rbp
; AVX2-NEXT: pushq %rbx
; AVX2-NEXT: movq {{[0-9]+}}(%rsp), %r11
; AVX2-NEXT: movq {{[0-9]+}}(%rsp), %r10
; AVX2-NEXT: testq %r9, %r9
; AVX2-NEXT: setns %al
; AVX2-NEXT: testq %rsi, %rsi
; AVX2-NEXT: setns %bl
; AVX2-NEXT: cmpb %al, %bl
; AVX2-NEXT: setne %bpl
; AVX2-NEXT: subq %r8, %rdi
; AVX2-NEXT: sbbq %r9, %rsi
; AVX2-NEXT: setns %al
; AVX2-NEXT: cmpb %al, %bl
; AVX2-NEXT: setne %al
; AVX2-NEXT: andb %bpl, %al
; AVX2-NEXT: seto %r8b
; AVX2-NEXT: subq {{[0-9]+}}(%rsp), %rdx
; AVX2-NEXT: movq %rcx, %rbp
; AVX2-NEXT: sbbq %r10, %rbp
; AVX2-NEXT: setns %bl
; AVX2-NEXT: testq %rcx, %rcx
; AVX2-NEXT: setns %cl
; AVX2-NEXT: cmpb %bl, %cl
; AVX2-NEXT: setne %r8b
; AVX2-NEXT: testq %r10, %r10
; AVX2-NEXT: setns %bl
; AVX2-NEXT: cmpb %bl, %cl
; AVX2-NEXT: setne %cl
; AVX2-NEXT: andb %r8b, %cl
; AVX2-NEXT: movzbl %cl, %ecx
; AVX2-NEXT: negl %ecx
; AVX2-NEXT: movzbl %al, %eax
; AVX2-NEXT: sbbq {{[0-9]+}}(%rsp), %rcx
; AVX2-NEXT: seto %al
; AVX2-NEXT: movzbl %al, %r9d
; AVX2-NEXT: negl %r9d
; AVX2-NEXT: movzbl %r8b, %eax
; AVX2-NEXT: negl %eax
; AVX2-NEXT: vmovd %eax, %xmm0
; AVX2-NEXT: vpinsrd $1, %ecx, %xmm0, %xmm0
; AVX2-NEXT: movq %rdx, 16(%r11)
; AVX2-NEXT: movq %rdi, (%r11)
; AVX2-NEXT: movq %rbp, 24(%r11)
; AVX2-NEXT: movq %rsi, 8(%r11)
; AVX2-NEXT: popq %rbx
; AVX2-NEXT: popq %rbp
; AVX2-NEXT: vpinsrd $1, %r9d, %xmm0, %xmm0
; AVX2-NEXT: movq %rdx, 16(%r10)
; AVX2-NEXT: movq %rdi, (%r10)
; AVX2-NEXT: movq %rcx, 24(%r10)
; AVX2-NEXT: movq %rsi, 8(%r10)
; AVX2-NEXT: retq
;
; AVX512-LABEL: ssubo_v2i128:
; AVX512: # %bb.0:
; AVX512-NEXT: pushq %r14
; AVX512-NEXT: pushq %rbx
; AVX512-NEXT: movq {{[0-9]+}}(%rsp), %r10
; AVX512-NEXT: movq {{[0-9]+}}(%rsp), %r11
; AVX512-NEXT: subq {{[0-9]+}}(%rsp), %rdx
; AVX512-NEXT: movq %rcx, %r14
; AVX512-NEXT: sbbq %r11, %r14
; AVX512-NEXT: setns %bl
; AVX512-NEXT: testq %rcx, %rcx
; AVX512-NEXT: setns %cl
; AVX512-NEXT: cmpb %bl, %cl
; AVX512-NEXT: setne %bl
; AVX512-NEXT: testq %r11, %r11
; AVX512-NEXT: setns %al
; AVX512-NEXT: cmpb %al, %cl
; AVX512-NEXT: setne %al
; AVX512-NEXT: andb %bl, %al
; AVX512-NEXT: sbbq {{[0-9]+}}(%rsp), %rcx
; AVX512-NEXT: seto %al
; AVX512-NEXT: kmovd %eax, %k0
; AVX512-NEXT: testq %r9, %r9
; AVX512-NEXT: setns %al
; AVX512-NEXT: testq %rsi, %rsi
; AVX512-NEXT: setns %cl
; AVX512-NEXT: cmpb %al, %cl
; AVX512-NEXT: setne %al
; AVX512-NEXT: subq %r8, %rdi
; AVX512-NEXT: sbbq %r9, %rsi
; AVX512-NEXT: setns %bl
; AVX512-NEXT: cmpb %bl, %cl
; AVX512-NEXT: setne %cl
; AVX512-NEXT: andb %al, %cl
; AVX512-NEXT: andl $1, %ecx
; AVX512-NEXT: kmovw %ecx, %k1
; AVX512-NEXT: seto %al
; AVX512-NEXT: andl $1, %eax
; AVX512-NEXT: kmovw %eax, %k1
; AVX512-NEXT: kshiftlw $1, %k0, %k0
; AVX512-NEXT: korw %k0, %k1, %k1
; AVX512-NEXT: vpcmpeqd %xmm0, %xmm0, %xmm0
; AVX512-NEXT: vmovdqa32 %xmm0, %xmm0 {%k1} {z}
; AVX512-NEXT: movq %rdx, 16(%r10)
; AVX512-NEXT: movq %rdi, (%r10)
; AVX512-NEXT: movq %r14, 24(%r10)
; AVX512-NEXT: movq %rcx, 24(%r10)
; AVX512-NEXT: movq %rsi, 8(%r10)
; AVX512-NEXT: popq %rbx
; AVX512-NEXT: popq %r14
; AVX512-NEXT: retq
%t = call {<2 x i128>, <2 x i1>} @llvm.ssub.with.overflow.v2i128(<2 x i128> %a0, <2 x i128> %a1)
%val = extractvalue {<2 x i128>, <2 x i1>} %t, 0

View File

@ -5,55 +5,35 @@
define zeroext i1 @saddoi128(i128 %v1, i128 %v2, i128* %res) nounwind {
; X64-LABEL: saddoi128:
; X64: ## %bb.0:
; X64-NEXT: testq %rcx, %rcx
; X64-NEXT: setns %r9b
; X64-NEXT: testq %rsi, %rsi
; X64-NEXT: setns %al
; X64-NEXT: cmpb %r9b, %al
; X64-NEXT: sete %r9b
; X64-NEXT: addq %rdx, %rdi
; X64-NEXT: adcq %rcx, %rsi
; X64-NEXT: setns %cl
; X64-NEXT: cmpb %cl, %al
; X64-NEXT: setne %al
; X64-NEXT: andb %r9b, %al
; X64-NEXT: seto %al
; X64-NEXT: movq %rdi, (%r8)
; X64-NEXT: movq %rsi, 8(%r8)
; X64-NEXT: retq
;
; X86-LABEL: saddoi128:
; X86: ## %bb.0:
; X86-NEXT: pushl %ebp
; X86-NEXT: pushl %ebx
; X86-NEXT: pushl %edi
; X86-NEXT: pushl %esi
; X86-NEXT: movl {{[0-9]+}}(%esp), %ebp
; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
; X86-NEXT: movl {{[0-9]+}}(%esp), %edx
; X86-NEXT: movl {{[0-9]+}}(%esp), %esi
; X86-NEXT: movl {{[0-9]+}}(%esp), %edi
; X86-NEXT: movl {{[0-9]+}}(%esp), %ebx
; X86-NEXT: cmpl $0, {{[0-9]+}}(%esp)
; X86-NEXT: setns %al
; X86-NEXT: testl %ebx, %ebx
; X86-NEXT: setns %ah
; X86-NEXT: cmpb %al, %ah
; X86-NEXT: sete %cl
; X86-NEXT: addl {{[0-9]+}}(%esp), %esi
; X86-NEXT: adcl {{[0-9]+}}(%esp), %edi
; X86-NEXT: adcl {{[0-9]+}}(%esp), %edx
; X86-NEXT: addl {{[0-9]+}}(%esp), %edi
; X86-NEXT: adcl {{[0-9]+}}(%esp), %ebx
; X86-NEXT: setns %al
; X86-NEXT: cmpb %al, %ah
; X86-NEXT: setne %al
; X86-NEXT: andb %cl, %al
; X86-NEXT: movl %esi, (%ebp)
; X86-NEXT: movl %edi, 4(%ebp)
; X86-NEXT: movl %edx, 8(%ebp)
; X86-NEXT: movl %ebx, 12(%ebp)
; X86-NEXT: adcl {{[0-9]+}}(%esp), %esi
; X86-NEXT: adcl {{[0-9]+}}(%esp), %edx
; X86-NEXT: seto %al
; X86-NEXT: movl %edi, (%ecx)
; X86-NEXT: movl %ebx, 4(%ecx)
; X86-NEXT: movl %esi, 8(%ecx)
; X86-NEXT: movl %edx, 12(%ecx)
; X86-NEXT: popl %esi
; X86-NEXT: popl %edi
; X86-NEXT: popl %ebx
; X86-NEXT: popl %ebp
; X86-NEXT: retl
%t = call {i128, i1} @llvm.sadd.with.overflow.i128(i128 %v1, i128 %v2)
%val = extractvalue {i128, i1} %t, 0
@ -106,55 +86,35 @@ define zeroext i1 @uaddoi128(i128 %v1, i128 %v2, i128* %res) nounwind {
define zeroext i1 @ssuboi128(i128 %v1, i128 %v2, i128* %res) nounwind {
; X64-LABEL: ssuboi128:
; X64: ## %bb.0:
; X64-NEXT: testq %rcx, %rcx
; X64-NEXT: setns %r9b
; X64-NEXT: testq %rsi, %rsi
; X64-NEXT: setns %al
; X64-NEXT: cmpb %r9b, %al
; X64-NEXT: setne %r9b
; X64-NEXT: subq %rdx, %rdi
; X64-NEXT: sbbq %rcx, %rsi
; X64-NEXT: setns %cl
; X64-NEXT: cmpb %cl, %al
; X64-NEXT: setne %al
; X64-NEXT: andb %r9b, %al
; X64-NEXT: seto %al
; X64-NEXT: movq %rdi, (%r8)
; X64-NEXT: movq %rsi, 8(%r8)
; X64-NEXT: retq
;
; X86-LABEL: ssuboi128:
; X86: ## %bb.0:
; X86-NEXT: pushl %ebp
; X86-NEXT: pushl %ebx
; X86-NEXT: pushl %edi
; X86-NEXT: pushl %esi
; X86-NEXT: movl {{[0-9]+}}(%esp), %ebp
; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
; X86-NEXT: movl {{[0-9]+}}(%esp), %edx
; X86-NEXT: movl {{[0-9]+}}(%esp), %esi
; X86-NEXT: movl {{[0-9]+}}(%esp), %edi
; X86-NEXT: movl {{[0-9]+}}(%esp), %ebx
; X86-NEXT: cmpl $0, {{[0-9]+}}(%esp)
; X86-NEXT: setns %al
; X86-NEXT: testl %ebx, %ebx
; X86-NEXT: setns %ah
; X86-NEXT: cmpb %al, %ah
; X86-NEXT: setne %cl
; X86-NEXT: subl {{[0-9]+}}(%esp), %esi
; X86-NEXT: sbbl {{[0-9]+}}(%esp), %edi
; X86-NEXT: sbbl {{[0-9]+}}(%esp), %edx
; X86-NEXT: subl {{[0-9]+}}(%esp), %edi
; X86-NEXT: sbbl {{[0-9]+}}(%esp), %ebx
; X86-NEXT: setns %al
; X86-NEXT: cmpb %al, %ah
; X86-NEXT: setne %al
; X86-NEXT: andb %cl, %al
; X86-NEXT: movl %esi, (%ebp)
; X86-NEXT: movl %edi, 4(%ebp)
; X86-NEXT: movl %edx, 8(%ebp)
; X86-NEXT: movl %ebx, 12(%ebp)
; X86-NEXT: sbbl {{[0-9]+}}(%esp), %esi
; X86-NEXT: sbbl {{[0-9]+}}(%esp), %edx
; X86-NEXT: seto %al
; X86-NEXT: movl %edi, (%ecx)
; X86-NEXT: movl %ebx, 4(%ecx)
; X86-NEXT: movl %esi, 8(%ecx)
; X86-NEXT: movl %edx, 12(%ecx)
; X86-NEXT: popl %esi
; X86-NEXT: popl %edi
; X86-NEXT: popl %ebx
; X86-NEXT: popl %ebp
; X86-NEXT: retl
%t = call {i128, i1} @llvm.ssub.with.overflow.i128(i128 %v1, i128 %v2)
%val = extractvalue {i128, i1} %t, 0