mirror of
https://github.com/RPCS3/llvm-mirror.git
synced 2025-01-31 20:51:52 +01:00
[SelectionDAG] Combine U{ADD,SUB}O diamonds into {ADD,SUB}CARRY
Summary: Convert (uaddo (uaddo x, y), carryIn) into addcarry x, y, carryIn if-and-only-if the carry flags of the first two uaddo are merged via OR or XOR. Work remaining: match ADD, etc. Reviewers: craig.topper, RKSimon, spatel, niravd, jonpa, uweigand, deadalnix, nikic, lebedev.ri, dmgreen, chfast Reviewed By: lebedev.ri Subscribers: chfast, llvm-commits Tags: #llvm Differential Revision: https://reviews.llvm.org/D70079
This commit is contained in:
parent
52ebfb3921
commit
87c9c2362c
@ -2802,6 +2802,96 @@ static SDValue combineADDCARRYDiamond(DAGCombiner &Combiner, SelectionDAG &DAG,
|
||||
return SDValue();
|
||||
}
|
||||
|
||||
// If we are facing some sort of diamond carry/borrow in/out pattern try to
|
||||
// match patterns like:
|
||||
//
|
||||
// (uaddo A, B) CarryIn
|
||||
// | \ |
|
||||
// | \ |
|
||||
// PartialSum PartialCarryOutX /
|
||||
// | | /
|
||||
// | ____|____________/
|
||||
// | / |
|
||||
// (uaddo *, *) \________
|
||||
// | \ \
|
||||
// | \ |
|
||||
// | PartialCarryOutY |
|
||||
// | \ |
|
||||
// | \ /
|
||||
// AddCarrySum | ______/
|
||||
// | /
|
||||
// CarryOut = (or *, *)
|
||||
//
|
||||
// And generate ADDCARRY (or SUBCARRY) with two result values:
|
||||
//
|
||||
// {AddCarrySum, CarryOut} = (addcarry A, B, CarryIn)
|
||||
//
|
||||
// Our goal is to identify A, B, and CarryIn and produce ADDCARRY/SUBCARRY with
|
||||
// a single path for carry/borrow out propagation:
|
||||
static SDValue combineCarryDiamond(DAGCombiner &Combiner, SelectionDAG &DAG,
|
||||
const TargetLowering &TLI, SDValue Carry0,
|
||||
SDValue Carry1, SDNode *N) {
|
||||
if (Carry0.getResNo() != 1 || Carry1.getResNo() != 1)
|
||||
return SDValue();
|
||||
unsigned Opcode = Carry0.getOpcode();
|
||||
if (Opcode != Carry1.getOpcode())
|
||||
return SDValue();
|
||||
if (Opcode != ISD::UADDO && Opcode != ISD::USUBO)
|
||||
return SDValue();
|
||||
|
||||
// Canonicalize the add/sub of A and B as Carry0 and the add/sub of the
|
||||
// carry/borrow in as Carry1. (The top and middle uaddo nodes respectively in
|
||||
// the above ASCII art.)
|
||||
if (Carry1.getOperand(0) != Carry0.getValue(0) &&
|
||||
Carry1.getOperand(1) != Carry0.getValue(0))
|
||||
std::swap(Carry0, Carry1);
|
||||
if (Carry1.getOperand(0) != Carry0.getValue(0) &&
|
||||
Carry1.getOperand(1) != Carry0.getValue(0))
|
||||
return SDValue();
|
||||
|
||||
// The carry in value must be on the righthand side for subtraction.
|
||||
unsigned CarryInOperandNum =
|
||||
Carry1.getOperand(0) == Carry0.getValue(0) ? 1 : 0;
|
||||
if (Opcode == ISD::USUBO && CarryInOperandNum != 1)
|
||||
return SDValue();
|
||||
SDValue CarryIn = Carry1.getOperand(CarryInOperandNum);
|
||||
|
||||
unsigned NewOp = Opcode == ISD::UADDO ? ISD::ADDCARRY : ISD::SUBCARRY;
|
||||
if (!TLI.isOperationLegalOrCustom(NewOp, Carry0.getValue(0).getValueType()))
|
||||
return SDValue();
|
||||
|
||||
// Verify that the carry/borrow in is plausibly a carry/borrow bit.
|
||||
// TODO: make getAsCarry() aware of how partial carries are merged.
|
||||
if (CarryIn.getOpcode() != ISD::ZERO_EXTEND)
|
||||
return SDValue();
|
||||
CarryIn = CarryIn.getOperand(0);
|
||||
if (CarryIn.getValueType() != MVT::i1)
|
||||
return SDValue();
|
||||
|
||||
SDLoc DL(N);
|
||||
SDValue Merged =
|
||||
DAG.getNode(NewOp, DL, Carry1->getVTList(), Carry0.getOperand(0),
|
||||
Carry0.getOperand(1), CarryIn);
|
||||
|
||||
// Please note that because we have proven that the result of the UADDO/USUBO
|
||||
// of A and B feeds into the UADDO/USUBO that does the carry/borrow in, we can
|
||||
// therefore prove that if the first UADDO/USUBO overflows, the second
|
||||
// UADDO/USUBO cannot. For example consider 8-bit numbers where 0xFF is the
|
||||
// maximum value.
|
||||
//
|
||||
// 0xFF + 0xFF == 0xFE with carry but 0xFE + 1 does not carry
|
||||
// 0x00 - 0xFF == 1 with a carry/borrow but 1 - 1 == 0 (no carry/borrow)
|
||||
//
|
||||
// This is important because it means that OR and XOR can be used to merge
|
||||
// carry flags; and that AND can return a constant zero.
|
||||
//
|
||||
// TODO: match other operations that can merge flags (ADD, etc)
|
||||
DAG.ReplaceAllUsesOfValueWith(Carry1.getValue(0), Merged.getValue(0));
|
||||
if (N->getOpcode() == ISD::AND)
|
||||
return DAG.getConstant(0, DL, MVT::i1);
|
||||
return Merged.getValue(1);
|
||||
}
|
||||
|
||||
SDValue DAGCombiner::visitADDCARRYLike(SDValue N0, SDValue N1, SDValue CarryIn,
|
||||
SDNode *N) {
|
||||
// fold (addcarry (xor a, -1), b, c) -> (subcarry b, a, !c) and flip carry.
|
||||
@ -5093,6 +5183,9 @@ SDValue DAGCombiner::visitAND(SDNode *N) {
|
||||
if (SDValue Shuffle = XformToShuffleWithZero(N))
|
||||
return Shuffle;
|
||||
|
||||
if (SDValue Combined = combineCarryDiamond(*this, DAG, TLI, N0, N1, N))
|
||||
return Combined;
|
||||
|
||||
// fold (and (or x, C), D) -> D if (C & D) == D
|
||||
auto MatchSubset = [](ConstantSDNode *LHS, ConstantSDNode *RHS) {
|
||||
return RHS->getAPIntValue().isSubsetOf(LHS->getAPIntValue());
|
||||
@ -5787,6 +5880,9 @@ SDValue DAGCombiner::visitOR(SDNode *N) {
|
||||
if (SDValue Combined = visitORLike(N0, N1, N))
|
||||
return Combined;
|
||||
|
||||
if (SDValue Combined = combineCarryDiamond(*this, DAG, TLI, N0, N1, N))
|
||||
return Combined;
|
||||
|
||||
// Recognize halfword bswaps as (bswap + rotl 16) or (bswap + shl 16)
|
||||
if (SDValue BSwap = MatchBSwapHWord(N, N0, N1))
|
||||
return BSwap;
|
||||
@ -7049,6 +7145,9 @@ SDValue DAGCombiner::visitXOR(SDNode *N) {
|
||||
if (SimplifyDemandedBits(SDValue(N, 0)))
|
||||
return SDValue(N, 0);
|
||||
|
||||
if (SDValue Combined = combineCarryDiamond(*this, DAG, TLI, N0, N1, N))
|
||||
return Combined;
|
||||
|
||||
return SDValue();
|
||||
}
|
||||
|
||||
|
@ -511,40 +511,13 @@ define i32 @add_U320_without_i128_add(%struct.U320* nocapture dereferenceable(40
|
||||
define i32 @add_U320_without_i128_or(%struct.U320* nocapture dereferenceable(40) %0, i64 %1, i64 %2, i64 %3, i64 %4, i64 %5) {
|
||||
; CHECK-LABEL: add_U320_without_i128_or:
|
||||
; CHECK: # %bb.0:
|
||||
; CHECK-NEXT: pushq %rbx
|
||||
; CHECK-NEXT: .cfi_def_cfa_offset 16
|
||||
; CHECK-NEXT: .cfi_offset %rbx, -16
|
||||
; CHECK-NEXT: addq 8(%rdi), %rdx
|
||||
; CHECK-NEXT: setb %r10b
|
||||
; CHECK-NEXT: addq %rsi, (%rdi)
|
||||
; CHECK-NEXT: adcq $0, %rdx
|
||||
; CHECK-NEXT: adcq %rdx, 8(%rdi)
|
||||
; CHECK-NEXT: adcq %rcx, 16(%rdi)
|
||||
; CHECK-NEXT: adcq %r8, 24(%rdi)
|
||||
; CHECK-NEXT: adcq %r9, 32(%rdi)
|
||||
; CHECK-NEXT: setb %al
|
||||
; CHECK-NEXT: addq 16(%rdi), %rcx
|
||||
; CHECK-NEXT: setb %r11b
|
||||
; CHECK-NEXT: orb %r10b, %al
|
||||
; CHECK-NEXT: movzbl %al, %ebx
|
||||
; CHECK-NEXT: addq %rcx, %rbx
|
||||
; CHECK-NEXT: setb %cl
|
||||
; CHECK-NEXT: addq 24(%rdi), %r8
|
||||
; CHECK-NEXT: setb %r10b
|
||||
; CHECK-NEXT: orb %r11b, %cl
|
||||
; CHECK-NEXT: movzbl %cl, %esi
|
||||
; CHECK-NEXT: addq %r8, %rsi
|
||||
; CHECK-NEXT: setb %al
|
||||
; CHECK-NEXT: addq 32(%rdi), %r9
|
||||
; CHECK-NEXT: setb %r8b
|
||||
; CHECK-NEXT: orb %r10b, %al
|
||||
; CHECK-NEXT: movzbl %al, %eax
|
||||
; CHECK-NEXT: addq %r9, %rax
|
||||
; CHECK-NEXT: setb %cl
|
||||
; CHECK-NEXT: movq %rdx, 8(%rdi)
|
||||
; CHECK-NEXT: movq %rbx, 16(%rdi)
|
||||
; CHECK-NEXT: movq %rsi, 24(%rdi)
|
||||
; CHECK-NEXT: movq %rax, 32(%rdi)
|
||||
; CHECK-NEXT: orb %r8b, %cl
|
||||
; CHECK-NEXT: movzbl %cl, %eax
|
||||
; CHECK-NEXT: popq %rbx
|
||||
; CHECK-NEXT: .cfi_def_cfa_offset 8
|
||||
; CHECK-NEXT: retq
|
||||
%7 = getelementptr inbounds %struct.U320, %struct.U320* %0, i64 0, i32 0, i64 0
|
||||
%8 = load i64, i64* %7, align 8
|
||||
@ -594,40 +567,13 @@ define i32 @add_U320_without_i128_or(%struct.U320* nocapture dereferenceable(40)
|
||||
define i32 @add_U320_without_i128_xor(%struct.U320* nocapture dereferenceable(40) %0, i64 %1, i64 %2, i64 %3, i64 %4, i64 %5) {
|
||||
; CHECK-LABEL: add_U320_without_i128_xor:
|
||||
; CHECK: # %bb.0:
|
||||
; CHECK-NEXT: pushq %rbx
|
||||
; CHECK-NEXT: .cfi_def_cfa_offset 16
|
||||
; CHECK-NEXT: .cfi_offset %rbx, -16
|
||||
; CHECK-NEXT: addq 8(%rdi), %rdx
|
||||
; CHECK-NEXT: setb %r10b
|
||||
; CHECK-NEXT: addq %rsi, (%rdi)
|
||||
; CHECK-NEXT: adcq $0, %rdx
|
||||
; CHECK-NEXT: adcq %rdx, 8(%rdi)
|
||||
; CHECK-NEXT: adcq %rcx, 16(%rdi)
|
||||
; CHECK-NEXT: adcq %r8, 24(%rdi)
|
||||
; CHECK-NEXT: adcq %r9, 32(%rdi)
|
||||
; CHECK-NEXT: setb %al
|
||||
; CHECK-NEXT: addq 16(%rdi), %rcx
|
||||
; CHECK-NEXT: setb %r11b
|
||||
; CHECK-NEXT: xorb %r10b, %al
|
||||
; CHECK-NEXT: movzbl %al, %ebx
|
||||
; CHECK-NEXT: addq %rcx, %rbx
|
||||
; CHECK-NEXT: setb %cl
|
||||
; CHECK-NEXT: addq 24(%rdi), %r8
|
||||
; CHECK-NEXT: setb %r10b
|
||||
; CHECK-NEXT: xorb %r11b, %cl
|
||||
; CHECK-NEXT: movzbl %cl, %esi
|
||||
; CHECK-NEXT: addq %r8, %rsi
|
||||
; CHECK-NEXT: setb %al
|
||||
; CHECK-NEXT: addq 32(%rdi), %r9
|
||||
; CHECK-NEXT: setb %r8b
|
||||
; CHECK-NEXT: xorb %r10b, %al
|
||||
; CHECK-NEXT: movzbl %al, %eax
|
||||
; CHECK-NEXT: addq %r9, %rax
|
||||
; CHECK-NEXT: setb %cl
|
||||
; CHECK-NEXT: movq %rdx, 8(%rdi)
|
||||
; CHECK-NEXT: movq %rbx, 16(%rdi)
|
||||
; CHECK-NEXT: movq %rsi, 24(%rdi)
|
||||
; CHECK-NEXT: movq %rax, 32(%rdi)
|
||||
; CHECK-NEXT: xorb %r8b, %cl
|
||||
; CHECK-NEXT: movzbl %cl, %eax
|
||||
; CHECK-NEXT: popq %rbx
|
||||
; CHECK-NEXT: .cfi_def_cfa_offset 8
|
||||
; CHECK-NEXT: retq
|
||||
%7 = getelementptr inbounds %struct.U320, %struct.U320* %0, i64 0, i32 0, i64 0
|
||||
%8 = load i64, i64* %7, align 8
|
||||
@ -674,34 +620,71 @@ define i32 @add_U320_without_i128_xor(%struct.U320* nocapture dereferenceable(40
|
||||
ret i32 %43
|
||||
}
|
||||
|
||||
; Either the primary addition can overflow or the addition of the carry, but
|
||||
; they cannot both overflow.
|
||||
define i32 @bogus_add_U320_without_i128_and(%struct.U320* nocapture dereferenceable(40) %0, i64 %1, i64 %2, i64 %3, i64 %4, i64 %5) {
|
||||
; CHECK-LABEL: bogus_add_U320_without_i128_and:
|
||||
; CHECK: # %bb.0:
|
||||
; CHECK-NEXT: addq %rsi, (%rdi)
|
||||
; CHECK-NEXT: adcq %rdx, 8(%rdi)
|
||||
; CHECK-NEXT: addq %rcx, 16(%rdi)
|
||||
; CHECK-NEXT: addq %r8, 24(%rdi)
|
||||
; CHECK-NEXT: addq %r9, 32(%rdi)
|
||||
; CHECK-NEXT: xorl %eax, %eax
|
||||
; CHECK-NEXT: retq
|
||||
%7 = getelementptr inbounds %struct.U320, %struct.U320* %0, i64 0, i32 0, i64 0
|
||||
%8 = load i64, i64* %7, align 8
|
||||
%9 = getelementptr inbounds %struct.U320, %struct.U320* %0, i64 0, i32 0, i64 1
|
||||
%10 = load i64, i64* %9, align 8
|
||||
%11 = getelementptr inbounds %struct.U320, %struct.U320* %0, i64 0, i32 0, i64 2
|
||||
%12 = load i64, i64* %11, align 8
|
||||
%13 = getelementptr inbounds %struct.U320, %struct.U320* %0, i64 0, i32 0, i64 3
|
||||
%14 = load i64, i64* %13, align 8
|
||||
%15 = getelementptr inbounds %struct.U320, %struct.U320* %0, i64 0, i32 0, i64 4
|
||||
%16 = load i64, i64* %15, align 8
|
||||
%17 = add i64 %8, %1
|
||||
%18 = add i64 %10, %2
|
||||
%19 = icmp ult i64 %17, %1
|
||||
%20 = zext i1 %19 to i64
|
||||
%21 = add i64 %18, %20
|
||||
%22 = add i64 %12, %3
|
||||
%23 = icmp ult i64 %18, %10
|
||||
%24 = icmp ult i64 %21, %18
|
||||
%25 = and i1 %23, %24
|
||||
%26 = zext i1 %25 to i64
|
||||
%27 = add i64 %22, %26
|
||||
%28 = add i64 %14, %4
|
||||
%29 = icmp ult i64 %22, %12
|
||||
%30 = icmp ult i64 %27, %22
|
||||
%31 = and i1 %29, %30
|
||||
%32 = zext i1 %31 to i64
|
||||
%33 = add i64 %28, %32
|
||||
%34 = add i64 %16, %5
|
||||
%35 = icmp ult i64 %28, %14
|
||||
%36 = icmp ult i64 %33, %28
|
||||
%37 = and i1 %35, %36
|
||||
%38 = zext i1 %37 to i64
|
||||
%39 = add i64 %34, %38
|
||||
store i64 %17, i64* %7, align 8
|
||||
store i64 %21, i64* %9, align 8
|
||||
store i64 %27, i64* %11, align 8
|
||||
store i64 %33, i64* %13, align 8
|
||||
store i64 %39, i64* %15, align 8
|
||||
%40 = icmp ult i64 %34, %16
|
||||
%41 = icmp ult i64 %39, %34
|
||||
%42 = and i1 %40, %41
|
||||
%43 = zext i1 %42 to i32
|
||||
ret i32 %43
|
||||
}
|
||||
|
||||
define void @add_U320_without_i128_or_no_ret(%struct.U320* nocapture dereferenceable(40) %0, i64 %1, i64 %2, i64 %3, i64 %4, i64 %5) {
|
||||
; CHECK-LABEL: add_U320_without_i128_or_no_ret:
|
||||
; CHECK: # %bb.0:
|
||||
; CHECK-NEXT: addq 8(%rdi), %rdx
|
||||
; CHECK-NEXT: setb %r10b
|
||||
; CHECK-NEXT: addq %rsi, (%rdi)
|
||||
; CHECK-NEXT: adcq $0, %rdx
|
||||
; CHECK-NEXT: setb %al
|
||||
; CHECK-NEXT: addq 16(%rdi), %rcx
|
||||
; CHECK-NEXT: setb %r11b
|
||||
; CHECK-NEXT: orb %r10b, %al
|
||||
; CHECK-NEXT: movzbl %al, %esi
|
||||
; CHECK-NEXT: addq %rcx, %rsi
|
||||
; CHECK-NEXT: setb %cl
|
||||
; CHECK-NEXT: addq 24(%rdi), %r8
|
||||
; CHECK-NEXT: setb %r10b
|
||||
; CHECK-NEXT: orb %r11b, %cl
|
||||
; CHECK-NEXT: movzbl %cl, %ecx
|
||||
; CHECK-NEXT: addq %r8, %rcx
|
||||
; CHECK-NEXT: setb %al
|
||||
; CHECK-NEXT: addq 32(%rdi), %r9
|
||||
; CHECK-NEXT: orb %r10b, %al
|
||||
; CHECK-NEXT: movzbl %al, %eax
|
||||
; CHECK-NEXT: addq %r9, %rax
|
||||
; CHECK-NEXT: movq %rdx, 8(%rdi)
|
||||
; CHECK-NEXT: movq %rsi, 16(%rdi)
|
||||
; CHECK-NEXT: movq %rcx, 24(%rdi)
|
||||
; CHECK-NEXT: movq %rax, 32(%rdi)
|
||||
; CHECK-NEXT: adcq %rdx, 8(%rdi)
|
||||
; CHECK-NEXT: adcq %rcx, 16(%rdi)
|
||||
; CHECK-NEXT: adcq %r8, 24(%rdi)
|
||||
; CHECK-NEXT: adcq %r9, 32(%rdi)
|
||||
; CHECK-NEXT: retq
|
||||
%7 = getelementptr inbounds %struct.U320, %struct.U320* %0, i64 0, i32 0, i64 0
|
||||
%8 = load i64, i64* %7, align 8
|
||||
@ -747,34 +730,12 @@ define void @add_U320_without_i128_or_no_ret(%struct.U320* nocapture dereference
|
||||
define i32 @add_U320_uaddo(%struct.U320* nocapture dereferenceable(40) %0, i64 %1, i64 %2, i64 %3, i64 %4, i64 %5) {
|
||||
; CHECK-LABEL: add_U320_uaddo:
|
||||
; CHECK: # %bb.0:
|
||||
; CHECK-NEXT: addq 8(%rdi), %rdx
|
||||
; CHECK-NEXT: setb %r10b
|
||||
; CHECK-NEXT: addq %rsi, (%rdi)
|
||||
; CHECK-NEXT: adcq $0, %rdx
|
||||
; CHECK-NEXT: adcq %rdx, 8(%rdi)
|
||||
; CHECK-NEXT: adcq %rcx, 16(%rdi)
|
||||
; CHECK-NEXT: adcq %r8, 24(%rdi)
|
||||
; CHECK-NEXT: adcq %r9, 32(%rdi)
|
||||
; CHECK-NEXT: setb %al
|
||||
; CHECK-NEXT: orb %r10b, %al
|
||||
; CHECK-NEXT: movzbl %al, %esi
|
||||
; CHECK-NEXT: addq 16(%rdi), %rcx
|
||||
; CHECK-NEXT: setb %r10b
|
||||
; CHECK-NEXT: addq %rsi, %rcx
|
||||
; CHECK-NEXT: setb %al
|
||||
; CHECK-NEXT: orb %r10b, %al
|
||||
; CHECK-NEXT: movzbl %al, %esi
|
||||
; CHECK-NEXT: addq 24(%rdi), %r8
|
||||
; CHECK-NEXT: setb %r10b
|
||||
; CHECK-NEXT: addq %rsi, %r8
|
||||
; CHECK-NEXT: setb %al
|
||||
; CHECK-NEXT: orb %r10b, %al
|
||||
; CHECK-NEXT: movzbl %al, %esi
|
||||
; CHECK-NEXT: addq 32(%rdi), %r9
|
||||
; CHECK-NEXT: setb %r10b
|
||||
; CHECK-NEXT: addq %rsi, %r9
|
||||
; CHECK-NEXT: setb %al
|
||||
; CHECK-NEXT: orb %r10b, %al
|
||||
; CHECK-NEXT: movq %rdx, 8(%rdi)
|
||||
; CHECK-NEXT: movq %rcx, 16(%rdi)
|
||||
; CHECK-NEXT: movq %r8, 24(%rdi)
|
||||
; CHECK-NEXT: movq %r9, 32(%rdi)
|
||||
; CHECK-NEXT: movzbl %al, %eax
|
||||
; CHECK-NEXT: retq
|
||||
%7 = getelementptr inbounds %struct.U320, %struct.U320* %0, i64 0, i32 0, i64 0
|
||||
@ -838,22 +799,14 @@ define void @PR39464(%struct.U192* noalias nocapture sret %0, %struct.U192* noca
|
||||
; CHECK: # %bb.0:
|
||||
; CHECK-NEXT: movq %rdi, %rax
|
||||
; CHECK-NEXT: movq (%rsi), %rcx
|
||||
; CHECK-NEXT: movq (%rdx), %r8
|
||||
; CHECK-NEXT: leaq (%rcx,%r8), %rdi
|
||||
; CHECK-NEXT: movq %rdi, (%rax)
|
||||
; CHECK-NEXT: movq 8(%rsi), %rdi
|
||||
; CHECK-NEXT: addq 8(%rdx), %rdi
|
||||
; CHECK-NEXT: setb %r9b
|
||||
; CHECK-NEXT: addq %r8, %rcx
|
||||
; CHECK-NEXT: adcq $0, %rdi
|
||||
; CHECK-NEXT: setb %cl
|
||||
; CHECK-NEXT: orb %r9b, %cl
|
||||
; CHECK-NEXT: movzbl %cl, %ecx
|
||||
; CHECK-NEXT: movq %rdi, 8(%rax)
|
||||
; CHECK-NEXT: movq 16(%rsi), %rsi
|
||||
; CHECK-NEXT: addq 16(%rdx), %rsi
|
||||
; CHECK-NEXT: addq %rcx, %rsi
|
||||
; CHECK-NEXT: movq %rsi, 16(%rax)
|
||||
; CHECK-NEXT: addq (%rdx), %rcx
|
||||
; CHECK-NEXT: movq %rcx, (%rdi)
|
||||
; CHECK-NEXT: movq 8(%rsi), %rcx
|
||||
; CHECK-NEXT: adcq 8(%rdx), %rcx
|
||||
; CHECK-NEXT: movq %rcx, 8(%rdi)
|
||||
; CHECK-NEXT: movq 16(%rsi), %rcx
|
||||
; CHECK-NEXT: adcq 16(%rdx), %rcx
|
||||
; CHECK-NEXT: movq %rcx, 16(%rdi)
|
||||
; CHECK-NEXT: retq
|
||||
%4 = getelementptr inbounds %struct.U192, %struct.U192* %1, i64 0, i32 0, i64 0
|
||||
%5 = load i64, i64* %4, align 8
|
||||
@ -896,12 +849,9 @@ define void @PR39464(%struct.U192* noalias nocapture sret %0, %struct.U192* noca
|
||||
define zeroext i1 @uaddo_U128_without_i128_or(i64 %0, i64 %1, i64 %2, i64 %3, %uint128* nocapture %4) nounwind {
|
||||
; CHECK-LABEL: uaddo_U128_without_i128_or:
|
||||
; CHECK: # %bb.0:
|
||||
; CHECK-NEXT: addq %rcx, %rsi
|
||||
; CHECK-NEXT: setb %cl
|
||||
; CHECK-NEXT: addq %rdx, %rdi
|
||||
; CHECK-NEXT: adcq $0, %rsi
|
||||
; CHECK-NEXT: adcq %rcx, %rsi
|
||||
; CHECK-NEXT: setb %al
|
||||
; CHECK-NEXT: orb %cl, %al
|
||||
; CHECK-NEXT: movq %rsi, (%r8)
|
||||
; CHECK-NEXT: movq %rdi, 8(%r8)
|
||||
; CHECK-NEXT: retq
|
||||
@ -927,18 +877,12 @@ define void @add_U192_without_i128_or(%uint192* sret %0, i64 %1, i64 %2, i64 %3,
|
||||
; CHECK-LABEL: add_U192_without_i128_or:
|
||||
; CHECK: # %bb.0:
|
||||
; CHECK-NEXT: movq %rdi, %rax
|
||||
; CHECK-NEXT: addq %r9, %rdx
|
||||
; CHECK-NEXT: setb %dil
|
||||
; CHECK-NEXT: addq %r8, %rsi
|
||||
; CHECK-NEXT: adcq $0, %rdx
|
||||
; CHECK-NEXT: setb %r8b
|
||||
; CHECK-NEXT: orb %dil, %r8b
|
||||
; CHECK-NEXT: addq {{[0-9]+}}(%rsp), %rcx
|
||||
; CHECK-NEXT: movzbl %r8b, %edi
|
||||
; CHECK-NEXT: addq %rcx, %rdi
|
||||
; CHECK-NEXT: movq %rdi, (%rax)
|
||||
; CHECK-NEXT: movq %rdx, 8(%rax)
|
||||
; CHECK-NEXT: movq %rsi, 16(%rax)
|
||||
; CHECK-NEXT: adcq %r9, %rdx
|
||||
; CHECK-NEXT: adcq {{[0-9]+}}(%rsp), %rcx
|
||||
; CHECK-NEXT: movq %rcx, (%rdi)
|
||||
; CHECK-NEXT: movq %rdx, 8(%rdi)
|
||||
; CHECK-NEXT: movq %rsi, 16(%rdi)
|
||||
; CHECK-NEXT: retq
|
||||
%8 = add i64 %4, %1
|
||||
%9 = icmp ult i64 %8, %1
|
||||
@ -969,29 +913,18 @@ define void @add_U256_without_i128_or_by_i64_words(%uint256* sret %0, %uint256*
|
||||
; CHECK-LABEL: add_U256_without_i128_or_by_i64_words:
|
||||
; CHECK: # %bb.0:
|
||||
; CHECK-NEXT: movq %rdi, %rax
|
||||
; CHECK-NEXT: movq (%rdx), %r9
|
||||
; CHECK-NEXT: movq 8(%rdx), %r10
|
||||
; CHECK-NEXT: addq 8(%rsi), %r10
|
||||
; CHECK-NEXT: setb %r8b
|
||||
; CHECK-NEXT: addq (%rsi), %r9
|
||||
; CHECK-NEXT: adcq $0, %r10
|
||||
; CHECK-NEXT: setb %cl
|
||||
; CHECK-NEXT: orb %r8b, %cl
|
||||
; CHECK-NEXT: movq 16(%rdx), %rdi
|
||||
; CHECK-NEXT: addq 16(%rsi), %rdi
|
||||
; CHECK-NEXT: setb %r8b
|
||||
; CHECK-NEXT: movzbl %cl, %r11d
|
||||
; CHECK-NEXT: addq %rdi, %r11
|
||||
; CHECK-NEXT: setb %cl
|
||||
; CHECK-NEXT: orb %r8b, %cl
|
||||
; CHECK-NEXT: movq (%rdx), %r8
|
||||
; CHECK-NEXT: movq 8(%rdx), %rdi
|
||||
; CHECK-NEXT: addq (%rsi), %r8
|
||||
; CHECK-NEXT: adcq 8(%rsi), %rdi
|
||||
; CHECK-NEXT: movq 16(%rdx), %rcx
|
||||
; CHECK-NEXT: adcq 16(%rsi), %rcx
|
||||
; CHECK-NEXT: movq 24(%rdx), %rdx
|
||||
; CHECK-NEXT: addq 24(%rsi), %rdx
|
||||
; CHECK-NEXT: movzbl %cl, %ecx
|
||||
; CHECK-NEXT: addq %rdx, %rcx
|
||||
; CHECK-NEXT: movq %rcx, (%rax)
|
||||
; CHECK-NEXT: movq %r11, 8(%rax)
|
||||
; CHECK-NEXT: movq %r10, 16(%rax)
|
||||
; CHECK-NEXT: movq %r9, 24(%rax)
|
||||
; CHECK-NEXT: adcq 24(%rsi), %rdx
|
||||
; CHECK-NEXT: movq %rdx, (%rax)
|
||||
; CHECK-NEXT: movq %rcx, 8(%rax)
|
||||
; CHECK-NEXT: movq %rdi, 16(%rax)
|
||||
; CHECK-NEXT: movq %r8, 24(%rax)
|
||||
; CHECK-NEXT: retq
|
||||
%4 = getelementptr inbounds %uint256, %uint256* %1, i64 0, i32 0, i32 0
|
||||
%5 = load i64, i64* %4, align 8
|
||||
@ -1043,24 +976,15 @@ define void @add_U256_without_i128_or_recursive(%uint256* sret %0, %uint256* %1,
|
||||
; CHECK-LABEL: add_U256_without_i128_or_recursive:
|
||||
; CHECK: # %bb.0:
|
||||
; CHECK-NEXT: movq %rdi, %rax
|
||||
; CHECK-NEXT: movq (%rdx), %r9
|
||||
; CHECK-NEXT: movq (%rdx), %r8
|
||||
; CHECK-NEXT: movq 8(%rdx), %rdi
|
||||
; CHECK-NEXT: addq 8(%rsi), %rdi
|
||||
; CHECK-NEXT: setb %r8b
|
||||
; CHECK-NEXT: addq (%rsi), %r9
|
||||
; CHECK-NEXT: adcq $0, %rdi
|
||||
; CHECK-NEXT: setb %cl
|
||||
; CHECK-NEXT: orb %r8b, %cl
|
||||
; CHECK-NEXT: movq 16(%rdx), %r8
|
||||
; CHECK-NEXT: movq 24(%rdx), %r10
|
||||
; CHECK-NEXT: xorl %edx, %edx
|
||||
; CHECK-NEXT: addq 16(%rsi), %r8
|
||||
; CHECK-NEXT: setb %dl
|
||||
; CHECK-NEXT: addq 24(%rsi), %r10
|
||||
; CHECK-NEXT: movzbl %cl, %ecx
|
||||
; CHECK-NEXT: addq %r8, %rcx
|
||||
; CHECK-NEXT: adcq %r10, %rdx
|
||||
; CHECK-NEXT: movq %r9, (%rax)
|
||||
; CHECK-NEXT: addq (%rsi), %r8
|
||||
; CHECK-NEXT: adcq 8(%rsi), %rdi
|
||||
; CHECK-NEXT: movq 16(%rdx), %rcx
|
||||
; CHECK-NEXT: movq 24(%rdx), %rdx
|
||||
; CHECK-NEXT: adcq 16(%rsi), %rcx
|
||||
; CHECK-NEXT: adcq 24(%rsi), %rdx
|
||||
; CHECK-NEXT: movq %r8, (%rax)
|
||||
; CHECK-NEXT: movq %rdi, 8(%rax)
|
||||
; CHECK-NEXT: movq %rcx, 16(%rax)
|
||||
; CHECK-NEXT: movq %rdx, 24(%rax)
|
||||
|
@ -192,51 +192,13 @@ define i64 @sub_from_carry(i64 %x, i64 %y, i64* %valout, i64 %z) {
|
||||
define i32 @sub_U320_without_i128_or(%struct.U320* nocapture dereferenceable(40) %0, i64 %1, i64 %2, i64 %3, i64 %4, i64 %5) {
|
||||
; CHECK-LABEL: sub_U320_without_i128_or:
|
||||
; CHECK: # %bb.0:
|
||||
; CHECK-NEXT: pushq %r14
|
||||
; CHECK-NEXT: .cfi_def_cfa_offset 16
|
||||
; CHECK-NEXT: pushq %rbx
|
||||
; CHECK-NEXT: .cfi_def_cfa_offset 24
|
||||
; CHECK-NEXT: .cfi_offset %rbx, -24
|
||||
; CHECK-NEXT: .cfi_offset %r14, -16
|
||||
; CHECK-NEXT: movq 8(%rdi), %r14
|
||||
; CHECK-NEXT: movq 16(%rdi), %r10
|
||||
; CHECK-NEXT: movq 24(%rdi), %r11
|
||||
; CHECK-NEXT: movq 32(%rdi), %rbx
|
||||
; CHECK-NEXT: xorl %eax, %eax
|
||||
; CHECK-NEXT: subq %rsi, (%rdi)
|
||||
; CHECK-NEXT: sbbq %rdx, 8(%rdi)
|
||||
; CHECK-NEXT: sbbq %rcx, 16(%rdi)
|
||||
; CHECK-NEXT: sbbq %r8, 24(%rdi)
|
||||
; CHECK-NEXT: sbbq %r9, 32(%rdi)
|
||||
; CHECK-NEXT: setb %al
|
||||
; CHECK-NEXT: subq %rdx, %r14
|
||||
; CHECK-NEXT: setb %dl
|
||||
; CHECK-NEXT: subq %rax, %r14
|
||||
; CHECK-NEXT: setb %al
|
||||
; CHECK-NEXT: subq %rcx, %r10
|
||||
; CHECK-NEXT: setb %cl
|
||||
; CHECK-NEXT: orb %dl, %al
|
||||
; CHECK-NEXT: movzbl %al, %eax
|
||||
; CHECK-NEXT: subq %rax, %r10
|
||||
; CHECK-NEXT: setb %al
|
||||
; CHECK-NEXT: subq %r8, %r11
|
||||
; CHECK-NEXT: setb %dl
|
||||
; CHECK-NEXT: orb %cl, %al
|
||||
; CHECK-NEXT: movzbl %al, %eax
|
||||
; CHECK-NEXT: subq %rax, %r11
|
||||
; CHECK-NEXT: setb %al
|
||||
; CHECK-NEXT: subq %r9, %rbx
|
||||
; CHECK-NEXT: setb %cl
|
||||
; CHECK-NEXT: orb %dl, %al
|
||||
; CHECK-NEXT: movzbl %al, %eax
|
||||
; CHECK-NEXT: subq %rax, %rbx
|
||||
; CHECK-NEXT: setb %al
|
||||
; CHECK-NEXT: movq %r14, 8(%rdi)
|
||||
; CHECK-NEXT: movq %r10, 16(%rdi)
|
||||
; CHECK-NEXT: movq %r11, 24(%rdi)
|
||||
; CHECK-NEXT: movq %rbx, 32(%rdi)
|
||||
; CHECK-NEXT: orb %cl, %al
|
||||
; CHECK-NEXT: movzbl %al, %eax
|
||||
; CHECK-NEXT: popq %rbx
|
||||
; CHECK-NEXT: .cfi_def_cfa_offset 16
|
||||
; CHECK-NEXT: popq %r14
|
||||
; CHECK-NEXT: .cfi_def_cfa_offset 8
|
||||
; CHECK-NEXT: retq
|
||||
%7 = getelementptr inbounds %struct.U320, %struct.U320* %0, i64 0, i32 0, i64 0
|
||||
%8 = load i64, i64* %7, align 8
|
||||
@ -286,51 +248,13 @@ define i32 @sub_U320_without_i128_or(%struct.U320* nocapture dereferenceable(40)
|
||||
define i32 @sub_U320_usubo(%struct.U320* nocapture dereferenceable(40) %0, i64 %1, i64 %2, i64 %3, i64 %4, i64 %5) {
|
||||
; CHECK-LABEL: sub_U320_usubo:
|
||||
; CHECK: # %bb.0:
|
||||
; CHECK-NEXT: pushq %r14
|
||||
; CHECK-NEXT: .cfi_def_cfa_offset 16
|
||||
; CHECK-NEXT: pushq %rbx
|
||||
; CHECK-NEXT: .cfi_def_cfa_offset 24
|
||||
; CHECK-NEXT: .cfi_offset %rbx, -24
|
||||
; CHECK-NEXT: .cfi_offset %r14, -16
|
||||
; CHECK-NEXT: movq 8(%rdi), %r14
|
||||
; CHECK-NEXT: movq 16(%rdi), %r10
|
||||
; CHECK-NEXT: movq 24(%rdi), %r11
|
||||
; CHECK-NEXT: movq 32(%rdi), %rbx
|
||||
; CHECK-NEXT: xorl %eax, %eax
|
||||
; CHECK-NEXT: subq %rsi, (%rdi)
|
||||
; CHECK-NEXT: sbbq %rdx, 8(%rdi)
|
||||
; CHECK-NEXT: sbbq %rcx, 16(%rdi)
|
||||
; CHECK-NEXT: sbbq %r8, 24(%rdi)
|
||||
; CHECK-NEXT: sbbq %r9, 32(%rdi)
|
||||
; CHECK-NEXT: setb %al
|
||||
; CHECK-NEXT: subq %rdx, %r14
|
||||
; CHECK-NEXT: setb %dl
|
||||
; CHECK-NEXT: subq %rax, %r14
|
||||
; CHECK-NEXT: setb %al
|
||||
; CHECK-NEXT: orb %dl, %al
|
||||
; CHECK-NEXT: movzbl %al, %eax
|
||||
; CHECK-NEXT: subq %rcx, %r10
|
||||
; CHECK-NEXT: setb %cl
|
||||
; CHECK-NEXT: subq %rax, %r10
|
||||
; CHECK-NEXT: setb %al
|
||||
; CHECK-NEXT: orb %cl, %al
|
||||
; CHECK-NEXT: movzbl %al, %eax
|
||||
; CHECK-NEXT: subq %r8, %r11
|
||||
; CHECK-NEXT: setb %cl
|
||||
; CHECK-NEXT: subq %rax, %r11
|
||||
; CHECK-NEXT: setb %al
|
||||
; CHECK-NEXT: orb %cl, %al
|
||||
; CHECK-NEXT: movzbl %al, %eax
|
||||
; CHECK-NEXT: subq %r9, %rbx
|
||||
; CHECK-NEXT: setb %cl
|
||||
; CHECK-NEXT: subq %rax, %rbx
|
||||
; CHECK-NEXT: setb %al
|
||||
; CHECK-NEXT: orb %cl, %al
|
||||
; CHECK-NEXT: movq %r14, 8(%rdi)
|
||||
; CHECK-NEXT: movq %r10, 16(%rdi)
|
||||
; CHECK-NEXT: movq %r11, 24(%rdi)
|
||||
; CHECK-NEXT: movq %rbx, 32(%rdi)
|
||||
; CHECK-NEXT: movzbl %al, %eax
|
||||
; CHECK-NEXT: popq %rbx
|
||||
; CHECK-NEXT: .cfi_def_cfa_offset 16
|
||||
; CHECK-NEXT: popq %r14
|
||||
; CHECK-NEXT: .cfi_def_cfa_offset 8
|
||||
; CHECK-NEXT: retq
|
||||
%7 = getelementptr inbounds %struct.U320, %struct.U320* %0, i64 0, i32 0, i64 0
|
||||
%8 = load i64, i64* %7, align 8
|
||||
@ -393,22 +317,14 @@ define void @PR39464(%struct.U192* noalias nocapture sret %0, %struct.U192* noca
|
||||
; CHECK: # %bb.0:
|
||||
; CHECK-NEXT: movq %rdi, %rax
|
||||
; CHECK-NEXT: movq (%rsi), %rcx
|
||||
; CHECK-NEXT: xorl %r9d, %r9d
|
||||
; CHECK-NEXT: subq (%rdx), %rcx
|
||||
; CHECK-NEXT: setb %r9b
|
||||
; CHECK-NEXT: movq %rcx, (%rdi)
|
||||
; CHECK-NEXT: movq 8(%rsi), %rdi
|
||||
; CHECK-NEXT: subq 8(%rdx), %rdi
|
||||
; CHECK-NEXT: setb %r8b
|
||||
; CHECK-NEXT: subq %r9, %rdi
|
||||
; CHECK-NEXT: setb %cl
|
||||
; CHECK-NEXT: orb %r8b, %cl
|
||||
; CHECK-NEXT: movzbl %cl, %ecx
|
||||
; CHECK-NEXT: movq %rdi, 8(%rax)
|
||||
; CHECK-NEXT: movq 16(%rsi), %rsi
|
||||
; CHECK-NEXT: subq 16(%rdx), %rsi
|
||||
; CHECK-NEXT: subq %rcx, %rsi
|
||||
; CHECK-NEXT: movq %rsi, 16(%rax)
|
||||
; CHECK-NEXT: movq 8(%rsi), %rcx
|
||||
; CHECK-NEXT: sbbq 8(%rdx), %rcx
|
||||
; CHECK-NEXT: movq %rcx, 8(%rdi)
|
||||
; CHECK-NEXT: movq 16(%rsi), %rcx
|
||||
; CHECK-NEXT: sbbq 16(%rdx), %rcx
|
||||
; CHECK-NEXT: movq %rcx, 16(%rdi)
|
||||
; CHECK-NEXT: retq
|
||||
%4 = getelementptr inbounds %struct.U192, %struct.U192* %1, i64 0, i32 0, i64 0
|
||||
%5 = load i64, i64* %4, align 8
|
||||
@ -454,28 +370,23 @@ define void @sub_U256_without_i128_or_recursive(%uint256* sret %0, %uint256* %1,
|
||||
; CHECK-LABEL: sub_U256_without_i128_or_recursive:
|
||||
; CHECK: # %bb.0:
|
||||
; CHECK-NEXT: movq %rdi, %rax
|
||||
; CHECK-NEXT: movq (%rsi), %r8
|
||||
; CHECK-NEXT: movq (%rsi), %r9
|
||||
; CHECK-NEXT: movq 8(%rsi), %r10
|
||||
; CHECK-NEXT: xorl %ecx, %ecx
|
||||
; CHECK-NEXT: subq (%rdx), %r8
|
||||
; CHECK-NEXT: setb %cl
|
||||
; CHECK-NEXT: subq 8(%rdx), %r10
|
||||
; CHECK-NEXT: setb %r9b
|
||||
; CHECK-NEXT: subq %rcx, %r10
|
||||
; CHECK-NEXT: setb %cl
|
||||
; CHECK-NEXT: orb %r9b, %cl
|
||||
; CHECK-NEXT: movq 16(%rsi), %rdi
|
||||
; CHECK-NEXT: subq (%rdx), %r9
|
||||
; CHECK-NEXT: sbbq 8(%rdx), %r10
|
||||
; CHECK-NEXT: setb %r8b
|
||||
; CHECK-NEXT: movq 16(%rsi), %rcx
|
||||
; CHECK-NEXT: movq 24(%rsi), %rsi
|
||||
; CHECK-NEXT: xorl %r9d, %r9d
|
||||
; CHECK-NEXT: subq 16(%rdx), %rdi
|
||||
; CHECK-NEXT: setb %r9b
|
||||
; CHECK-NEXT: xorl %edi, %edi
|
||||
; CHECK-NEXT: subq 16(%rdx), %rcx
|
||||
; CHECK-NEXT: setb %dil
|
||||
; CHECK-NEXT: subq 24(%rdx), %rsi
|
||||
; CHECK-NEXT: movzbl %cl, %ecx
|
||||
; CHECK-NEXT: subq %rcx, %rdi
|
||||
; CHECK-NEXT: sbbq %r9, %rsi
|
||||
; CHECK-NEXT: movq %r8, (%rax)
|
||||
; CHECK-NEXT: movzbl %r8b, %edx
|
||||
; CHECK-NEXT: subq %rdx, %rcx
|
||||
; CHECK-NEXT: sbbq %rdi, %rsi
|
||||
; CHECK-NEXT: movq %r9, (%rax)
|
||||
; CHECK-NEXT: movq %r10, 8(%rax)
|
||||
; CHECK-NEXT: movq %rdi, 16(%rax)
|
||||
; CHECK-NEXT: movq %rcx, 16(%rax)
|
||||
; CHECK-NEXT: movq %rsi, 24(%rax)
|
||||
; CHECK-NEXT: retq
|
||||
%4 = getelementptr inbounds %uint256, %uint256* %1, i64 0, i32 0, i32 0
|
||||
|
Loading…
x
Reference in New Issue
Block a user