From 5129faa331eee91e2f607d67fccc08469cedb904 Mon Sep 17 00:00:00 2001 From: Hans Wennborg Date: Tue, 12 Sep 2017 16:24:17 +0000 Subject: [PATCH] Revert r313009 "[ARM] Use ADDCARRY / SUBCARRY" MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This was causing PR34045 to fire again. > This is a preparatory step for D34515 and also is being recommitted as its > first version caused PR34045. > > This change: > - makes nodes ISD::ADDCARRY and ISD::SUBCARRY legal for i32 > - lowering is done by first converting the boolean value into the carry flag > using (_, C) ← (ARMISD::ADDC R, -1) and converted back to an integer value > using (R, _) ← (ARMISD::ADDE 0, 0, C). An ARMISD::ADDE between the two > operations does the actual addition. > - for subtraction, given that ISD::SUBCARRY second result is actually a > borrow, we need to invert the value of the second operand and result before > and after using ARMISD::SUBE. We need to invert the carry result of > ARMISD::SUBE to preserve the semantics. > - given that the generic combiner may lower ISD::ADDCARRY and > ISD::SUBCARRYinto ISD::UADDO and ISD::USUBO we need to update their lowering > as well otherwise i64 operations now would require branches. This implies > updating the corresponding test for unsigned. > - add new combiner to remove the redundant conversions from/to carry flags > to/from boolean values (ARMISD::ADDC (ARMISD::ADDE 0, 0, C), -1) → C > - fixes PR34045 > > Differential Revision: https://reviews.llvm.org/D35192 Also revert follow-up r313010: > [ARM] Fix typo when creating ISD::SUB nodes > > In D35192, I accidentally introduced a typo when creating ISD::SUB nodes, > giving them two values instead of one. > > This fails when the merge_values combiner finds one of these nodes. > > This change fixes PR34564. > > Differential Revision: https://reviews.llvm.org/D37690 llvm-svn: 313044 --- lib/Target/ARM/ARMISelLowering.cpp | 185 +++--------------------- lib/Target/ARM/ARMISelLowering.h | 3 +- test/CodeGen/ARM/intrinsics-overflow.ll | 88 ++--------- test/CodeGen/ARM/pr34045.ll | 53 ------- 4 files changed, 35 insertions(+), 294 deletions(-) delete mode 100644 test/CodeGen/ARM/pr34045.ll diff --git a/lib/Target/ARM/ARMISelLowering.cpp b/lib/Target/ARM/ARMISelLowering.cpp index 0bd7c71fe4b..e947ddc7031 100644 --- a/lib/Target/ARM/ARMISelLowering.cpp +++ b/lib/Target/ARM/ARMISelLowering.cpp @@ -802,9 +802,6 @@ ARMTargetLowering::ARMTargetLowering(const TargetMachine &TM, setOperationAction(ISD::SSUBO, MVT::i32, Custom); setOperationAction(ISD::USUBO, MVT::i32, Custom); - setOperationAction(ISD::ADDCARRY, MVT::i32, Custom); - setOperationAction(ISD::SUBCARRY, MVT::i32, Custom); - // i64 operation support. setOperationAction(ISD::MUL, MVT::i64, Expand); setOperationAction(ISD::MULHU, MVT::i32, Expand); @@ -3956,7 +3953,7 @@ ARMTargetLowering::getARMXALUOOp(SDValue Op, SelectionDAG &DAG, } SDValue -ARMTargetLowering::LowerSignedALUO(SDValue Op, SelectionDAG &DAG) const { +ARMTargetLowering::LowerXALUO(SDValue Op, SelectionDAG &DAG) const { // Let legalize expand this if it isn't a legal type yet. if (!DAG.getTargetLoweringInfo().isTypeLegal(Op.getValueType())) return SDValue(); @@ -3978,66 +3975,6 @@ ARMTargetLowering::LowerSignedALUO(SDValue Op, SelectionDAG &DAG) const { return DAG.getNode(ISD::MERGE_VALUES, dl, VTs, Value, Overflow); } -static SDValue ConvertBooleanCarryToCarryFlag(SDValue BoolCarry, - SelectionDAG &DAG) { - SDLoc DL(BoolCarry); - EVT CarryVT = BoolCarry.getValueType(); - - APInt NegOne = APInt::getAllOnesValue(CarryVT.getScalarSizeInBits()); - // This converts the boolean value carry into the carry flag by doing - // ARMISD::ADDC Carry, ~0 - return DAG.getNode(ARMISD::ADDC, DL, DAG.getVTList(CarryVT, MVT::i32), - BoolCarry, DAG.getConstant(NegOne, DL, CarryVT)); -} - -static SDValue ConvertCarryFlagToBooleanCarry(SDValue Flags, EVT VT, - SelectionDAG &DAG) { - SDLoc DL(Flags); - - // Now convert the carry flag into a boolean carry. We do this - // using ARMISD:ADDE 0, 0, Carry - return DAG.getNode(ARMISD::ADDE, DL, DAG.getVTList(VT, MVT::i32), - DAG.getConstant(0, DL, MVT::i32), - DAG.getConstant(0, DL, MVT::i32), Flags); -} - -SDValue ARMTargetLowering::LowerUnsignedALUO(SDValue Op, - SelectionDAG &DAG) const { - // Let legalize expand this if it isn't a legal type yet. - if (!DAG.getTargetLoweringInfo().isTypeLegal(Op.getValueType())) - return SDValue(); - - SDValue LHS = Op.getOperand(0); - SDValue RHS = Op.getOperand(1); - SDLoc dl(Op); - - EVT VT = Op.getValueType(); - SDVTList VTs = DAG.getVTList(VT, MVT::i32); - SDValue Value; - SDValue Overflow; - switch (Op.getOpcode()) { - default: - llvm_unreachable("Unknown overflow instruction!"); - case ISD::UADDO: - Value = DAG.getNode(ARMISD::ADDC, dl, VTs, LHS, RHS); - // Convert the carry flag into a boolean value. - Overflow = ConvertCarryFlagToBooleanCarry(Value.getValue(1), VT, DAG); - break; - case ISD::USUBO: { - Value = DAG.getNode(ARMISD::SUBC, dl, VTs, LHS, RHS); - // Convert the carry flag into a boolean value. - Overflow = ConvertCarryFlagToBooleanCarry(Value.getValue(1), VT, DAG); - // ARMISD::SUBC returns 0 when we have to borrow, so make it an overflow - // value. So compute 1 - C. - Overflow = DAG.getNode(ISD::SUB, dl, MVT::i32, - DAG.getConstant(1, dl, MVT::i32), Overflow); - break; - } - } - - return DAG.getNode(ISD::MERGE_VALUES, dl, VTs, Value, Overflow); -} - SDValue ARMTargetLowering::LowerSELECT(SDValue Op, SelectionDAG &DAG) const { SDValue Cond = Op.getOperand(0); SDValue SelectTrue = Op.getOperand(1); @@ -7443,53 +7380,6 @@ static SDValue LowerADDC_ADDE_SUBC_SUBE(SDValue Op, SelectionDAG &DAG) { Op.getOperand(1), Op.getOperand(2)); } -static SDValue LowerADDSUBCARRY(SDValue Op, SelectionDAG &DAG) { - SDNode *N = Op.getNode(); - EVT VT = N->getValueType(0); - SDVTList VTs = DAG.getVTList(VT, MVT::i32); - - SDValue Carry = Op.getOperand(2); - EVT CarryVT = Carry.getValueType(); - - SDLoc DL(Op); - - APInt NegOne = APInt::getAllOnesValue(CarryVT.getScalarSizeInBits()); - - SDValue Result; - if (Op.getOpcode() == ISD::ADDCARRY) { - // This converts the boolean value carry into the carry flag. - Carry = ConvertBooleanCarryToCarryFlag(Carry, DAG); - - // Do the addition proper using the carry flag we wanted. - Result = DAG.getNode(ARMISD::ADDE, DL, VTs, Op.getOperand(0), - Op.getOperand(1), Carry.getValue(1)); - - // Now convert the carry flag into a boolean value. - Carry = ConvertCarryFlagToBooleanCarry(Result.getValue(1), VT, DAG); - } else { - // ARMISD::SUBE expects a carry not a borrow like ISD::SUBCARRY so we - // have to invert the carry first. - Carry = DAG.getNode(ISD::SUB, DL, MVT::i32, - DAG.getConstant(1, DL, MVT::i32), Carry); - // This converts the boolean value carry into the carry flag. - Carry = ConvertBooleanCarryToCarryFlag(Carry, DAG); - - // Do the subtraction proper using the carry flag we wanted. - Result = DAG.getNode(ARMISD::SUBE, DL, VTs, Op.getOperand(0), - Op.getOperand(1), Carry.getValue(1)); - - // Now convert the carry flag into a boolean value. - Carry = ConvertCarryFlagToBooleanCarry(Result.getValue(1), VT, DAG); - // But the carry returned by ARMISD::SUBE is not a borrow as expected - // by ISD::SUBCARRY, so compute 1 - C. - Carry = DAG.getNode(ISD::SUB, DL, MVT::i32, - DAG.getConstant(1, DL, MVT::i32), Carry); - } - - // Return both values. - return DAG.getNode(ISD::MERGE_VALUES, DL, N->getVTList(), Result, Carry); -} - SDValue ARMTargetLowering::LowerFSINCOS(SDValue Op, SelectionDAG &DAG) const { assert(Subtarget->isTargetDarwin()); @@ -7844,14 +7734,11 @@ SDValue ARMTargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const { case ISD::ADDE: case ISD::SUBC: case ISD::SUBE: return LowerADDC_ADDE_SUBC_SUBE(Op, DAG); - case ISD::ADDCARRY: - case ISD::SUBCARRY: return LowerADDSUBCARRY(Op, DAG); case ISD::SADDO: - case ISD::SSUBO: - return LowerSignedALUO(Op, DAG); case ISD::UADDO: + case ISD::SSUBO: case ISD::USUBO: - return LowerUnsignedALUO(Op, DAG); + return LowerXALUO(Op, DAG); case ISD::ATOMIC_LOAD: case ISD::ATOMIC_STORE: return LowerAtomicLoadStore(Op, DAG); case ISD::FSINCOS: return LowerFSINCOS(Op, DAG); @@ -9800,11 +9687,11 @@ static SDValue AddCombineTo64bitMLAL(SDNode *AddeNode, // a S/UMLAL instruction. // UMUL_LOHI // / :lo \ :hi - // V \ [no multiline comment] - // loAdd -> ADDC | - // \ :carry / - // V V - // ADDE <- hiAdd + // / \ [no multiline comment] + // loAdd -> ADDE | + // \ :glue / + // \ / + // ADDC <- hiAdd // assert(AddeNode->getOpcode() == ARMISD::ADDE && "Expect an ADDE"); @@ -9812,7 +9699,7 @@ static SDValue AddCombineTo64bitMLAL(SDNode *AddeNode, AddeNode->getOperand(2).getValueType() == MVT::i32 && "ADDE node has the wrong inputs"); - // Check that we are chained to the right ADDC node. + // Check that we have a glued ADDC node. SDNode* AddcNode = AddeNode->getOperand(2).getNode(); if (AddcNode->getOpcode() != ARMISD::ADDC) return SDValue(); @@ -9863,7 +9750,7 @@ static SDValue AddCombineTo64bitMLAL(SDNode *AddeNode, SDValue* LoMul = nullptr; SDValue* LowAdd = nullptr; - // Ensure that ADDE is from high result of ISD::xMUL_LOHI. + // Ensure that ADDE is from high result of ISD::SMUL_LOHI. if ((AddeOp0 != MULOp.getValue(1)) && (AddeOp1 != MULOp.getValue(1))) return SDValue(); @@ -9888,11 +9775,6 @@ static SDValue AddCombineTo64bitMLAL(SDNode *AddeNode, if (!LoMul) return SDValue(); - // If HiAdd is a predecessor of ADDC, the replacement below will create a - // cycle. - if (AddcNode->isPredecessorOf(HiAdd->getNode())) - return SDValue(); - // Create the merged node. SelectionDAG &DAG = DCI.DAG; @@ -9995,22 +9877,8 @@ static SDValue PerformUMLALCombine(SDNode *N, SelectionDAG &DAG, return SDValue(); } -static SDValue PerformAddcSubcCombine(SDNode *N, - TargetLowering::DAGCombinerInfo &DCI, +static SDValue PerformAddcSubcCombine(SDNode *N, SelectionDAG &DAG, const ARMSubtarget *Subtarget) { - SelectionDAG &DAG(DCI.DAG); - - if (N->getOpcode() == ARMISD::ADDC) { - // (ADDC (ADDE 0, 0, C), -1) -> C - SDValue LHS = N->getOperand(0); - SDValue RHS = N->getOperand(1); - if (LHS->getOpcode() == ARMISD::ADDE && - isNullConstant(LHS->getOperand(0)) && - isNullConstant(LHS->getOperand(1)) && isAllOnesConstant(RHS)) { - return DCI.CombineTo(N, SDValue(N, 0), LHS->getOperand(2)); - } - } - if (Subtarget->isThumb1Only()) { SDValue RHS = N->getOperand(1); if (ConstantSDNode *C = dyn_cast(RHS)) { @@ -11899,14 +11767,6 @@ static SDValue PerformExtendCombine(SDNode *N, SelectionDAG &DAG, return SDValue(); } -static const APInt *isPowerOf2Constant(SDValue V) { - ConstantSDNode *C = dyn_cast(V); - if (!C) - return nullptr; - const APInt *CV = &C->getAPIntValue(); - return CV->isPowerOf2() ? CV : nullptr; -} - SDValue ARMTargetLowering::PerformCMOVToBFICombine(SDNode *CMOV, SelectionDAG &DAG) const { // If we have a CMOV, OR and AND combination such as: // if (x & CN) @@ -11935,8 +11795,8 @@ SDValue ARMTargetLowering::PerformCMOVToBFICombine(SDNode *CMOV, SelectionDAG &D SDValue And = CmpZ->getOperand(0); if (And->getOpcode() != ISD::AND) return SDValue(); - const APInt *AndC = isPowerOf2Constant(And->getOperand(1)); - if (!AndC) + ConstantSDNode *AndC = dyn_cast(And->getOperand(1)); + if (!AndC || !AndC->getAPIntValue().isPowerOf2()) return SDValue(); SDValue X = And->getOperand(0); @@ -11976,7 +11836,7 @@ SDValue ARMTargetLowering::PerformCMOVToBFICombine(SDNode *CMOV, SelectionDAG &D SDValue V = Y; SDLoc dl(X); EVT VT = X.getValueType(); - unsigned BitInX = AndC->logBase2(); + unsigned BitInX = AndC->getAPIntValue().logBase2(); if (BitInX != 0) { // We must shift X first. @@ -12137,7 +11997,7 @@ SDValue ARMTargetLowering::PerformDAGCombine(SDNode *N, case ISD::XOR: return PerformXORCombine(N, DCI, Subtarget); case ISD::AND: return PerformANDCombine(N, DCI, Subtarget); case ARMISD::ADDC: - case ARMISD::SUBC: return PerformAddcSubcCombine(N, DCI, Subtarget); + case ARMISD::SUBC: return PerformAddcSubcCombine(N, DCI.DAG, Subtarget); case ARMISD::SUBE: return PerformAddeSubeCombine(N, DCI.DAG, Subtarget); case ARMISD::BFI: return PerformBFICombine(N, DCI); case ARMISD::VMOVRRD: return PerformVMOVRRDCombine(N, DCI, Subtarget); @@ -12833,17 +12693,10 @@ void ARMTargetLowering::computeKnownBitsForTargetNode(const SDValue Op, case ARMISD::ADDE: case ARMISD::SUBC: case ARMISD::SUBE: - // Special cases when we convert a carry to a boolean. - if (Op.getResNo() == 0) { - SDValue LHS = Op.getOperand(0); - SDValue RHS = Op.getOperand(1); - // (ADDE 0, 0, C) will give us a single bit. - if (Op->getOpcode() == ARMISD::ADDE && isNullConstant(LHS) && - isNullConstant(RHS)) { - Known.Zero |= APInt::getHighBitsSet(BitWidth, BitWidth - 1); - return; - } - } + // These nodes' second result is a boolean + if (Op.getResNo() == 0) + break; + Known.Zero |= APInt::getHighBitsSet(BitWidth, BitWidth - 1); break; case ARMISD::CMOV: { // Bits are known zero/one if known on the LHS and RHS. diff --git a/lib/Target/ARM/ARMISelLowering.h b/lib/Target/ARM/ARMISelLowering.h index a448cb6a2a6..40cf54586af 100644 --- a/lib/Target/ARM/ARMISelLowering.h +++ b/lib/Target/ARM/ARMISelLowering.h @@ -625,8 +625,7 @@ class InstrItineraryData; SDValue LowerGlobalTLSAddressWindows(SDValue Op, SelectionDAG &DAG) const; SDValue LowerGLOBAL_OFFSET_TABLE(SDValue Op, SelectionDAG &DAG) const; SDValue LowerBR_JT(SDValue Op, SelectionDAG &DAG) const; - SDValue LowerSignedALUO(SDValue Op, SelectionDAG &DAG) const; - SDValue LowerUnsignedALUO(SDValue Op, SelectionDAG &DAG) const; + SDValue LowerXALUO(SDValue Op, SelectionDAG &DAG) const; SDValue LowerSELECT(SDValue Op, SelectionDAG &DAG) const; SDValue LowerSELECT_CC(SDValue Op, SelectionDAG &DAG) const; SDValue LowerBR_CC(SDValue Op, SelectionDAG &DAG) const; diff --git a/test/CodeGen/ARM/intrinsics-overflow.ll b/test/CodeGen/ARM/intrinsics-overflow.ll index af555d2240c..af3dd9dd411 100644 --- a/test/CodeGen/ARM/intrinsics-overflow.ll +++ b/test/CodeGen/ARM/intrinsics-overflow.ll @@ -1,6 +1,4 @@ -; RUN: llc < %s -mtriple=arm-linux -mcpu=generic -verify-machineinstrs | FileCheck %s --check-prefix=CHECK --check-prefix=ARM -; RUN: llc < %s -mtriple=thumbv6m-eabi -verify-machineinstrs | FileCheck %s --check-prefix=CHECK --check-prefix=THUMBV6 -; RUN: llc < %s -mtriple=thumbv7-eabi -verify-machineinstrs | FileCheck %s --check-prefix=CHECK --check-prefix=THUMBV7 +; RUN: llc < %s -mtriple=arm-linux -mcpu=generic | FileCheck %s define i32 @uadd_overflow(i32 %a, i32 %b) #0 { %sadd = tail call { i32, i1 } @llvm.uadd.with.overflow.i32(i32 %a, i32 %b) @@ -9,19 +7,10 @@ define i32 @uadd_overflow(i32 %a, i32 %b) #0 { ret i32 %2 ; CHECK-LABEL: uadd_overflow: - - ; ARM: adds r[[R0:[0-9]+]], r[[R0]], r[[R1:[0-9]+]] - ; ARM: mov r[[R2:[0-9]+]], #0 - ; ARM: adc r[[R0]], r[[R2]], #0 - - ; THUMBV6: movs r[[R2:[0-9]+]], #0 - ; THUMBV6: adds r[[R0:[0-9]+]], r[[R0]], r[[R1:[0-9]+]] - ; THUMBV6: adcs r[[R2]], r[[R2]] - ; THUMBV6: mov r[[R0]], r[[R2]] - - ; THUMBV7: adds r[[R0:[0-9]+]], r[[R0]], r[[R1:[0-9]+]] - ; THUMBV7: mov.w r[[R2:[0-9]+]], #0 - ; THUMBV7: adc r[[R0]], r[[R2]], #0 + ; CHECK: add r[[R2:[0-9]+]], r[[R0:[0-9]+]], r[[R1:[0-9]+]] + ; CHECK: mov r[[R1]], #1 + ; CHECK: cmp r[[R2]], r[[R0]] + ; CHECK: movhs r[[R1]], #0 } @@ -32,26 +21,10 @@ define i32 @sadd_overflow(i32 %a, i32 %b) #0 { ret i32 %2 ; CHECK-LABEL: sadd_overflow: - - ; ARM: add r[[R2:[0-9]+]], r[[R0:[0-9]+]], r[[R1:[0-9]+]] - ; ARM: mov r[[R1]], #1 - ; ARM: cmp r[[R2]], r[[R0]] - ; ARM: movvc r[[R1]], #0 - - ; THUMBV6: mov r[[R2:[0-9]+]], r[[R0:[0-9]+]] - ; THUMBV6: adds r[[R3:[0-9]+]], r[[R2]], r[[R1:[0-9]+]] - ; THUMBV6: movs r[[R0]], #0 - ; THUMBV6: movs r[[R1]], #1 - ; THUMBV6: cmp r[[R3]], r[[R2]] - ; THUMBV6: bvc .L[[LABEL:.*]] - ; THUMBV6: mov r[[R0]], r[[R1]] - ; THUMBV6: .L[[LABEL]]: - - ; THUMBV7: movs r[[R1]], #1 - ; THUMBV7: cmp r[[R2]], r[[R0]] - ; THUMBV7: it vc - ; THUMBV7: movvc r[[R1]], #0 - ; THUMBV7: mov r[[R0]], r[[R1]] + ; CHECK: add r[[R2:[0-9]+]], r[[R0:[0-9]+]], r[[R1:[0-9]+]] + ; CHECK: mov r[[R1]], #1 + ; CHECK: cmp r[[R2]], r[[R0]] + ; CHECK: movvc r[[R1]], #0 } define i32 @usub_overflow(i32 %a, i32 %b) #0 { @@ -61,26 +34,9 @@ define i32 @usub_overflow(i32 %a, i32 %b) #0 { ret i32 %2 ; CHECK-LABEL: usub_overflow: - - ; ARM: subs r[[R0:[0-9]+]], r[[R0]], r[[R1:[0-9]+]] - ; ARM: mov r[[R2:[0-9]+]], #0 - ; ARM: adc r[[R0]], r[[R2]], #0 - ; ARM: rsb r[[R0]], r[[R0]], #1 - - ; THUMBV6: movs r[[R2:[0-9]+]], #0 - ; THUMBV6: subs r[[R0:[0-9]+]], r[[R0]], r[[R1:[0-9]+]] - ; THUMBV6: adcs r[[R2]], r[[R2]] - ; THUMBV6: movs r[[R0]], #1 - ; THUMBV6: subs r[[R0]], r[[R0]], r[[R2]] - - ; THUMBV7: subs r[[R0:[0-9]+]], r[[R0]], r[[R1:[0-9]+]] - ; THUMBV7: mov.w r[[R2:[0-9]+]], #0 - ; THUMBV7: adc r[[R0]], r[[R2]], #0 - ; THUMBV7: rsb.w r[[R0]], r[[R0]], #1 - - ; We should know that the overflow is just 1 bit, - ; no need to clear any other bit - ; CHECK-NOT: and + ; CHECK: mov r[[R2]], #1 + ; CHECK: cmp r[[R0]], r[[R1]] + ; CHECK: movhs r[[R2]], #0 } define i32 @ssub_overflow(i32 %a, i32 %b) #0 { @@ -90,23 +46,9 @@ define i32 @ssub_overflow(i32 %a, i32 %b) #0 { ret i32 %2 ; CHECK-LABEL: ssub_overflow: - - ; ARM: mov r[[R2]], #1 - ; ARM: cmp r[[R0]], r[[R1]] - ; ARM: movvc r[[R2]], #0 - - ; THUMBV6: movs r[[R0]], #0 - ; THUMBV6: movs r[[R3:[0-9]+]], #1 - ; THUMBV6: cmp r[[R2]], r[[R1:[0-9]+]] - ; THUMBV6: bvc .L[[LABEL:.*]] - ; THUMBV6: mov r[[R0]], r[[R3]] - ; THUMBV6: .L[[LABEL]]: - - ; THUMBV7: movs r[[R2:[0-9]+]], #1 - ; THUMBV7: cmp r[[R0:[0-9]+]], r[[R1:[0-9]+]] - ; THUMBV7: it vc - ; THUMBV7: movvc r[[R2]], #0 - ; THUMBV7: mov r[[R0]], r[[R2]] + ; CHECK: mov r[[R2]], #1 + ; CHECK: cmp r[[R0]], r[[R1]] + ; CHECK: movvc r[[R2]], #0 } declare { i32, i1 } @llvm.uadd.with.overflow.i32(i32, i32) #1 diff --git a/test/CodeGen/ARM/pr34045.ll b/test/CodeGen/ARM/pr34045.ll deleted file mode 100644 index 5d52bfe591b..00000000000 --- a/test/CodeGen/ARM/pr34045.ll +++ /dev/null @@ -1,53 +0,0 @@ -; RUN: llc < %s -mtriple thumbv7 | FileCheck %s - -; ModuleID = 'bugpoint-reduced-simplified.bc' -define hidden void @bn_mul_comba8(i32* nocapture %r, i32* nocapture readonly %a, i32* nocapture readonly %b) local_unnamed_addr { -entry: -; This test is actually checking that no cycle is introduced but at least we -; want to see a couple of umull and one umlal in the output -; CHECK: umull -; CHECK: umull -; CHECK: umlal - %0 = load i32, i32* %a, align 4 - %conv = zext i32 %0 to i64 - %1 = load i32, i32* %b, align 4 - %conv2 = zext i32 %1 to i64 - %mul = mul nuw i64 %conv2, %conv - %shr = lshr i64 %mul, 32 - %2 = load i32, i32* %a, align 4 - %conv13 = zext i32 %2 to i64 - %3 = load i32, i32* undef, align 4 - %conv15 = zext i32 %3 to i64 - %mul16 = mul nuw i64 %conv15, %conv13 - %add18 = add i64 %mul16, %shr - %shr20 = lshr i64 %add18, 32 - %conv21 = trunc i64 %shr20 to i32 - %4 = load i32, i32* undef, align 4 - %conv34 = zext i32 %4 to i64 - %5 = load i32, i32* %b, align 4 - %conv36 = zext i32 %5 to i64 - %mul37 = mul nuw i64 %conv36, %conv34 - %conv38 = and i64 %add18, 4294967295 - %add39 = add i64 %mul37, %conv38 - %shr41 = lshr i64 %add39, 32 - %conv42 = trunc i64 %shr41 to i32 - %add43 = add i32 %conv42, %conv21 - %cmp44 = icmp ult i32 %add43, %conv42 - %c1.1 = zext i1 %cmp44 to i32 - %add65 = add i32 0, %c1.1 - %add86 = add i32 %add65, 0 - %add107 = add i32 %add86, 0 - %conv124 = zext i32 %add107 to i64 - %add125 = add i64 0, %conv124 - %conv145 = and i64 %add125, 4294967295 - %add146 = add i64 %conv145, 0 - %conv166 = and i64 %add146, 4294967295 - %add167 = add i64 %conv166, 0 - %conv187 = and i64 %add167, 4294967295 - %add188 = add i64 %conv187, 0 - %conv189 = trunc i64 %add188 to i32 - %arrayidx200 = getelementptr inbounds i32, i32* %r, i32 3 - store i32 %conv189, i32* %arrayidx200, align 4 - ret void -} -