diff --git a/lib/CodeGen/IfConversion.cpp b/lib/CodeGen/IfConversion.cpp index 1ae7e3b5ca6..418994dd2cd 100644 --- a/lib/CodeGen/IfConversion.cpp +++ b/lib/CodeGen/IfConversion.cpp @@ -666,32 +666,29 @@ void IfConverter::ScanInstructions(BBInfo &BBI) { bool isPredicated = TII->isPredicated(I); bool isCondBr = BBI.IsBrAnalyzable && I->isConditionalBranch(); - if (!isCondBr) { - if (!isPredicated) { - BBI.NonPredSize++; - unsigned ExtraPredCost = 0; - unsigned NumCycles = TII->getInstrLatency(InstrItins, &*I, - &ExtraPredCost); - if (NumCycles > 1) - BBI.ExtraCost += NumCycles-1; - BBI.ExtraCost2 += ExtraPredCost; - } else if (!AlreadyPredicated) { - // FIXME: This instruction is already predicated before the - // if-conversion pass. It's probably something like a conditional move. - // Mark this block unpredicable for now. - BBI.IsUnpredicable = true; - return; - } + // A conditional branch is not predicable, but it may be eliminated. + if (isCondBr) + continue; + + if (!isPredicated) { + BBI.NonPredSize++; + unsigned ExtraPredCost = 0; + unsigned NumCycles = TII->getInstrLatency(InstrItins, &*I, + &ExtraPredCost); + if (NumCycles > 1) + BBI.ExtraCost += NumCycles-1; + BBI.ExtraCost2 += ExtraPredCost; + } else if (!AlreadyPredicated) { + // FIXME: This instruction is already predicated before the + // if-conversion pass. It's probably something like a conditional move. + // Mark this block unpredicable for now. + BBI.IsUnpredicable = true; + return; } if (BBI.ClobbersPred && !isPredicated) { // Predicate modification instruction should end the block (except for // already predicated instructions and end of block branches). - if (isCondBr) { - // A conditional branch is not predicable, but it may be eliminated. - continue; - } - // Predicate may have been modified, the subsequent (currently) // unpredicated instructions cannot be correctly predicated. BBI.IsUnpredicable = true; diff --git a/lib/Target/ARM/ARMBaseInstrInfo.cpp b/lib/Target/ARM/ARMBaseInstrInfo.cpp index 62e80632c74..696d039e74e 100644 --- a/lib/Target/ARM/ARMBaseInstrInfo.cpp +++ b/lib/Target/ARM/ARMBaseInstrInfo.cpp @@ -513,6 +513,74 @@ bool ARMBaseInstrInfo::DefinesPredicate(MachineInstr *MI, return Found; } +static bool isV8EligibleForIT(MachineInstr *MI) { + switch (MI->getOpcode()) { + default: + return false; + case ARM::tADC: + case ARM::tADDi3: + case ARM::tADDi8: + case ARM::tADDrSPi: + case ARM::tADDrr: + case ARM::tAND: + case ARM::tASRri: + case ARM::tASRrr: + case ARM::tBIC: + case ARM::tCMNz: + case ARM::tCMPi8: + case ARM::tCMPr: + case ARM::tEOR: + case ARM::tLDRBi: + case ARM::tLDRBr: + case ARM::tLDRHi: + case ARM::tLDRHr: + case ARM::tLDRSB: + case ARM::tLDRSH: + case ARM::tLDRi: + case ARM::tLDRr: + case ARM::tLDRspi: + case ARM::tLSLri: + case ARM::tLSLrr: + case ARM::tLSRri: + case ARM::tLSRrr: + case ARM::tMOVi8: + case ARM::tMUL: + case ARM::tMVN: + case ARM::tORR: + case ARM::tROR: + case ARM::tRSB: + case ARM::tSBC: + case ARM::tSTRBi: + case ARM::tSTRBr: + case ARM::tSTRHi: + case ARM::tSTRHr: + case ARM::tSTRi: + case ARM::tSTRr: + case ARM::tSTRspi: + case ARM::tSUBi3: + case ARM::tSUBi8: + case ARM::tSUBrr: + case ARM::tTST: + return true; +// there are some "conditionally deprecated" opcodes + case ARM::tADDspr: + return MI->getOperand(2).getReg() != ARM::PC; + case ARM::tADDrSP: + case ARM::tBX: + case ARM::tBLXr: + // ADD PC, SP and BLX PC were always unpredictable, + // now on top of it they're deprecated + return MI->getOperand(0).getReg() != ARM::PC; + case ARM::tADDhirr: + return MI->getOperand(0).getReg() != ARM::PC && + MI->getOperand(2).getReg() != ARM::PC; + case ARM::tCMPhir: + case ARM::tMOVr: + return MI->getOperand(0).getReg() != ARM::PC && + MI->getOperand(1).getReg() != ARM::PC; + } +} + /// isPredicable - Return true if the specified instruction can be predicated. /// By default, this returns true for every instruction with a /// PredicateOperand. @@ -520,11 +588,17 @@ bool ARMBaseInstrInfo::isPredicable(MachineInstr *MI) const { if (!MI->isPredicable()) return false; - if ((MI->getDesc().TSFlags & ARMII::DomainMask) == ARMII::DomainNEON) { - ARMFunctionInfo *AFI = - MI->getParent()->getParent()->getInfo(); - return AFI->isThumb2Function(); + ARMFunctionInfo *AFI = + MI->getParent()->getParent()->getInfo(); + + if (AFI->isThumb2Function()) { + if (getSubtarget().hasV8Ops()) + return isV8EligibleForIT(MI); + } else { // non-Thumb + if ((MI->getDesc().TSFlags & ARMII::DomainMask) == ARMII::DomainNEON) + return false; } + return true; } diff --git a/lib/Target/ARM/ARMFastISel.cpp b/lib/Target/ARM/ARMFastISel.cpp index 3cbb9a8185b..f3a74c7109e 100644 --- a/lib/Target/ARM/ARMFastISel.cpp +++ b/lib/Target/ARM/ARMFastISel.cpp @@ -254,10 +254,10 @@ bool ARMFastISel::DefinesOptionalPredicate(MachineInstr *MI, bool *CPSR) { bool ARMFastISel::isARMNEONPred(const MachineInstr *MI) { const MCInstrDesc &MCID = MI->getDesc(); - // If we're a thumb2 or not NEON function we were handled via isPredicable. + // If we're a thumb2 or not NEON function we'll be handled via isPredicable. if ((MCID.TSFlags & ARMII::DomainMask) != ARMII::DomainNEON || AFI->isThumb2Function()) - return false; + return MI->isPredicable(); for (unsigned i = 0, e = MCID.getNumOperands(); i != e; ++i) if (MCID.OpInfo[i].isPredicate()) @@ -278,7 +278,7 @@ ARMFastISel::AddOptionalDefs(const MachineInstrBuilder &MIB) { // Do we use a predicate? or... // Are we NEON in ARM mode and have a predicate operand? If so, I know // we're not predicable but add it anyways. - if (TII.isPredicable(MI) || isARMNEONPred(MI)) + if (isARMNEONPred(MI)) AddDefaultPred(MIB); // Do we optionally set a predicate? Preds is size > 0 iff the predicate diff --git a/lib/Target/ARM/ARMTargetMachine.cpp b/lib/Target/ARM/ARMTargetMachine.cpp index 1ba78e4a984..c78db54d8b0 100644 --- a/lib/Target/ARM/ARMTargetMachine.cpp +++ b/lib/Target/ARM/ARMTargetMachine.cpp @@ -196,8 +196,13 @@ bool ARMPassConfig::addPreSched2() { addPass(createARMExpandPseudoPass()); if (getOptLevel() != CodeGenOpt::None) { - if (!getARMSubtarget().isThumb1Only()) + if (!getARMSubtarget().isThumb1Only()) { + // in v8, IfConversion depends on Thumb instruction widths + if (getARMSubtarget().hasV8Ops() && + !getARMSubtarget().prefers32BitThumb()) + addPass(createThumb2SizeReductionPass()); addPass(&IfConverterID); + } } if (getARMSubtarget().isThumb2()) addPass(createThumb2ITBlockPass()); diff --git a/lib/Target/ARM/Thumb2ITBlockPass.cpp b/lib/Target/ARM/Thumb2ITBlockPass.cpp index d8596d7993e..8f1ae2eaf8f 100644 --- a/lib/Target/ARM/Thumb2ITBlockPass.cpp +++ b/lib/Target/ARM/Thumb2ITBlockPass.cpp @@ -28,6 +28,7 @@ namespace { static char ID; Thumb2ITBlockPass() : MachineFunctionPass(ID) {} + bool hasV8Ops; const Thumb2InstrInfo *TII; const TargetRegisterInfo *TRI; ARMFunctionInfo *AFI; @@ -192,37 +193,41 @@ bool Thumb2ITBlockPass::InsertITInstructions(MachineBasicBlock &MBB) { // Form IT block. ARMCC::CondCodes OCC = ARMCC::getOppositeCondition(CC); unsigned Mask = 0, Pos = 3; - // Branches, including tricky ones like LDM_RET, need to end an IT - // block so check the instruction we just put in the block. - for (; MBBI != E && Pos && - (!MI->isBranch() && !MI->isReturn()) ; ++MBBI) { - if (MBBI->isDebugValue()) - continue; - MachineInstr *NMI = &*MBBI; - MI = NMI; - - unsigned NPredReg = 0; - ARMCC::CondCodes NCC = getITInstrPredicate(NMI, NPredReg); - if (NCC == CC || NCC == OCC) { - Mask |= (NCC & 1) << Pos; - // Add implicit use of ITSTATE. - NMI->addOperand(MachineOperand::CreateReg(ARM::ITSTATE, false/*ifDef*/, - true/*isImp*/, false/*isKill*/)); - LastITMI = NMI; - } else { - if (NCC == ARMCC::AL && - MoveCopyOutOfITBlock(NMI, CC, OCC, Defs, Uses)) { - --MBBI; - MBB.remove(NMI); - MBB.insert(InsertPos, NMI); - ++NumMovedInsts; + // v8 IT blocks are limited to one conditional op: skip the loop + if (!hasV8Ops) { + // Branches, including tricky ones like LDM_RET, need to end an IT + // block so check the instruction we just put in the block. + for (; MBBI != E && Pos && + (!MI->isBranch() && !MI->isReturn()) ; ++MBBI) { + if (MBBI->isDebugValue()) continue; + + MachineInstr *NMI = &*MBBI; + MI = NMI; + + unsigned NPredReg = 0; + ARMCC::CondCodes NCC = getITInstrPredicate(NMI, NPredReg); + if (NCC == CC || NCC == OCC) { + Mask |= (NCC & 1) << Pos; + // Add implicit use of ITSTATE. + NMI->addOperand(MachineOperand::CreateReg(ARM::ITSTATE, false/*ifDef*/, + true/*isImp*/, false/*isKill*/)); + LastITMI = NMI; + } else { + if (NCC == ARMCC::AL && + MoveCopyOutOfITBlock(NMI, CC, OCC, Defs, Uses)) { + --MBBI; + MBB.remove(NMI); + MBB.insert(InsertPos, NMI); + ++NumMovedInsts; + continue; + } + break; } - break; + TrackDefUses(NMI, Defs, Uses, TRI); + --Pos; } - TrackDefUses(NMI, Defs, Uses, TRI); - --Pos; } // Finalize IT mask. @@ -250,6 +255,7 @@ bool Thumb2ITBlockPass::runOnMachineFunction(MachineFunction &Fn) { AFI = Fn.getInfo(); TII = static_cast(TM.getInstrInfo()); TRI = TM.getRegisterInfo(); + hasV8Ops = TM.getSubtarget().hasV8Ops(); if (!AFI->isThumbFunction()) return false; diff --git a/test/CodeGen/ARM/2013-05-05-IfConvertBug.ll b/test/CodeGen/ARM/2013-05-05-IfConvertBug.ll index 2eeebac9930..8bc8cb1d890 100644 --- a/test/CodeGen/ARM/2013-05-05-IfConvertBug.ll +++ b/test/CodeGen/ARM/2013-05-05-IfConvertBug.ll @@ -1,4 +1,5 @@ ; RUN: llc < %s -mtriple=thumbv7-apple-ios -mcpu=cortex-a8 | FileCheck %s +; RUN: llc < %s -mtriple=thumbv8 | FileCheck -check-prefix=CHECK-V8 %s ; rdar://13782395 define i32 @t1(i32 %a, i32 %b, i8** %retaddr) { @@ -100,6 +101,27 @@ KBBlockZero.exit: ; preds = %bb2.i ; CHECK: [[LABEL]]: ; CHECK-NEXT: subs r0, r1, r0 ; CHECK-NEXT: bx lr + +; CHECK-V8-LABEL: wrapDistance: +; CHECK-V8: cmp r1, #59 +; CHECK-V8-NEXT: bgt +; CHECK-V8-NEXT: %if.then +; CHECK-V8-NEXT: subs r0, r2, #1 +; CHECK-V8-NEXT: bx lr +; CHECK-V8-NEXT: %if.else +; CHECK-V8-NEXT: subs [[REG:r[0-9]+]], #120 +; CHECK-V8-NEXT: cmp [[REG]], r1 +; CHECK-V8-NEXT: bge +; CHECK-V8-NEXT: %if.else +; CHECK-V8-NEXT: cmp r0, #119 +; CHECK-V8-NEXT: bgt +; CHECK-V8-NEXT: %if.then4 +; CHECK-V8-NEXT: adds r0, r1, #1 +; CHECK-V8-NEXT: bx lr +; CHECK-V8-NEXT: %if.end5 +; CHECK-V8-NEXT: subs r0, r1, r0 +; CHECK-V8-NEXT: bx lr + define i32 @wrapDistance(i32 %tx, i32 %sx, i32 %w) { entry: %cmp = icmp slt i32 %sx, 60 diff --git a/test/CodeGen/ARM/arm-and-tst-peephole.ll b/test/CodeGen/ARM/arm-and-tst-peephole.ll index 07620700aed..88d797e8364 100644 --- a/test/CodeGen/ARM/arm-and-tst-peephole.ll +++ b/test/CodeGen/ARM/arm-and-tst-peephole.ll @@ -1,6 +1,7 @@ ; RUN: llc < %s -march=arm | FileCheck -check-prefix=ARM %s ; RUN: llc < %s -march=thumb | FileCheck -check-prefix=THUMB %s ; RUN: llc < %s -march=thumb -mattr=+thumb2 | FileCheck -check-prefix=T2 %s +; RUN: llc < %s -mtriple=thumbv8 | FileCheck -check-prefix=V8 %s ; FIXME: The -march=thumb test doesn't change if -disable-peephole is specified. @@ -39,6 +40,17 @@ tailrecurse: ; preds = %sw.bb, %entry br i1 %tst, label %sw.bb, label %tailrecurse.switch tailrecurse.switch: ; preds = %tailrecurse +; V8-LABEL: %tailrecurse.switch +; V8: cmp +; V8-NEXT: beq +; V8-NEXT: %tailrecurse.switch +; V8: cmp +; V8-NEXT: beq +; V8-NEXT: %tailrecurse.switch +; V8: cmp +; V8-NEXT: beq +; V8-NEXT: b +; The trailing space in the last line checks that the branch is unconditional switch i32 %and, label %sw.epilog [ i32 1, label %sw.bb i32 3, label %sw.bb6 @@ -73,6 +85,7 @@ sw.epilog: ; preds = %tailrecurse.switch ; ARM: bar ; THUMB: bar ; T2: bar +; V8-LABEL: bar: define internal zeroext i8 @bar(%struct.S* %x, %struct.S* nocapture %y) nounwind readonly { entry: %0 = getelementptr inbounds %struct.S* %x, i32 0, i32 1, i32 0 @@ -81,22 +94,32 @@ entry: ; ARM: ands ; THUMB: ands ; T2: ands +; V8: ands +; V8-NEXT: beq %3 = and i32 %2, 112 %4 = icmp eq i32 %3, 0 br i1 %4, label %return, label %bb bb: ; preds = %entry +; V8-NEXT: %bb %5 = getelementptr inbounds %struct.S* %y, i32 0, i32 1, i32 0 %6 = load i8* %5, align 1 %7 = zext i8 %6 to i32 ; ARM: andsne ; THUMB: ands ; T2: andsne +; V8: ands +; V8-NEXT: beq %8 = and i32 %7, 112 %9 = icmp eq i32 %8, 0 br i1 %9, label %return, label %bb2 bb2: ; preds = %bb +; V8-NEXT: %bb2 +; V8-NEXT: cmp +; V8-NEXT: it ne +; V8-NEXT: cmpne +; V8-NEXT: bne %10 = icmp eq i32 %3, 16 %11 = icmp eq i32 %8, 16 %or.cond = or i1 %10, %11 diff --git a/test/CodeGen/ARM/fast-isel-select.ll b/test/CodeGen/ARM/fast-isel-select.ll index dbec1c6bf99..40f88075039 100644 --- a/test/CodeGen/ARM/fast-isel-select.ll +++ b/test/CodeGen/ARM/fast-isel-select.ll @@ -1,6 +1,7 @@ ; RUN: llc < %s -O0 -verify-machineinstrs -fast-isel-abort -relocation-model=dynamic-no-pic -mtriple=armv7-apple-ios | FileCheck %s --check-prefix=ARM ; RUN: llc < %s -O0 -verify-machineinstrs -fast-isel-abort -relocation-model=dynamic-no-pic -mtriple=armv7-linux-gnueabi | FileCheck %s --check-prefix=ARM ; RUN: llc < %s -O0 -verify-machineinstrs -fast-isel-abort -relocation-model=dynamic-no-pic -mtriple=thumbv7-apple-ios | FileCheck %s --check-prefix=THUMB +; RUN: llc < %s -O0 -fast-isel-abort -relocation-model=dynamic-no-pic -mtriple=thumbv8-apple-ios | FileCheck %s --check-prefix=THUMB define i32 @t1(i1 %c) nounwind readnone { entry: diff --git a/test/CodeGen/ARM/indirectbr.ll b/test/CodeGen/ARM/indirectbr.ll index 99e84a6a625..1aeeb916e48 100644 --- a/test/CodeGen/ARM/indirectbr.ll +++ b/test/CodeGen/ARM/indirectbr.ll @@ -1,6 +1,7 @@ ; RUN: llc < %s -relocation-model=pic -mtriple=armv6-apple-darwin | FileCheck %s -check-prefix=ARM ; RUN: llc < %s -relocation-model=pic -mtriple=thumbv6-apple-darwin | FileCheck %s -check-prefix=THUMB ; RUN: llc < %s -relocation-model=static -mtriple=thumbv7-apple-darwin | FileCheck %s -check-prefix=THUMB2 +; RUN: llc < %s -relocation-model=static -mtriple=thumbv8-apple-darwin | FileCheck %s -check-prefix=THUMB2 @nextaddr = global i8* null ; [#uses=2] @C.0.2070 = private constant [5 x i8*] [i8* blockaddress(@foo, %L1), i8* blockaddress(@foo, %L2), i8* blockaddress(@foo, %L3), i8* blockaddress(@foo, %L4), i8* blockaddress(@foo, %L5)] ; <[5 x i8*]*> [#uses=1] @@ -48,14 +49,17 @@ L2: ; preds = %L3, %bb2 L1: ; preds = %L2, %bb2 %res.3 = phi i32 [ %phitmp, %L2 ], [ 2, %bb2 ] ; [#uses=1] +; ARM-LABEL: %L1 ; ARM: ldr [[R1:r[0-9]+]], LCPI ; ARM: add [[R1b:r[0-9]+]], pc, [[R1]] ; ARM: str [[R1b]] +; THUMB-LABEL: %L1 ; THUMB: ldr ; THUMB: add ; THUMB: ldr [[R2:r[0-9]+]], LCPI ; THUMB: add [[R2]], pc ; THUMB: str [[R2]] +; THUMB2-LABEL: %L1 ; THUMB2: ldr [[R2:r[0-9]+]], LCPI ; THUMB2-NEXT: str{{(.w)?}} [[R2]] store i8* blockaddress(@foo, %L5), i8** @nextaddr, align 4 diff --git a/test/CodeGen/ARM/thumb2-it-block.ll b/test/CodeGen/ARM/thumb2-it-block.ll index a25352c0f03..47c5dccd6fe 100644 --- a/test/CodeGen/ARM/thumb2-it-block.ll +++ b/test/CodeGen/ARM/thumb2-it-block.ll @@ -1,14 +1,15 @@ ; RUN: llc < %s -march=thumb -mattr=+thumb2 | FileCheck %s +; RUN: llc < %s -mtriple=thumbv8 | FileCheck %s ; PR11107 define i32 @test(i32 %a, i32 %b) { entry: ; CHECK: cmp ; CHECK-NEXT: it mi -; CHECK-NEXT: rsbmi +; CHECK-NEXT: rsb{{s?}}mi ; CHECK-NEXT: cmp ; CHECK-NEXT: it mi -; CHECK-NEXT: rsbmi +; CHECK-NEXT: rsb{{s?}}mi %cmp1 = icmp slt i32 %a, 0 %sub1 = sub nsw i32 0, %a %abs1 = select i1 %cmp1, i32 %sub1, i32 %a diff --git a/test/CodeGen/Thumb2/v8_IT_1.ll b/test/CodeGen/Thumb2/v8_IT_1.ll new file mode 100644 index 00000000000..e33845db8cb --- /dev/null +++ b/test/CodeGen/Thumb2/v8_IT_1.ll @@ -0,0 +1,19 @@ +; RUN: llc < %s -mtriple=thumbv8 -mattr=+neon | FileCheck %s + +;CHECK-LABEL: select_s_v_v: +;CHECK: beq .LBB0_2 +;CHECK-NEXT: @ BB#1: +;CHECK-NEXT: vmov.i32 +;CHECK-NEXT: .LBB0_2: +;CHECK: bx +define <16 x i8> @select_s_v_v(i32 %avail, i8* %bar) { +entry: + %vld1 = call <16 x i8> @llvm.arm.neon.vld1.v16i8(i8* %bar, i32 1) + %and = and i32 %avail, 1 + %tobool = icmp eq i32 %and, 0 + %vld1. = select i1 %tobool, <16 x i8> %vld1, <16 x i8> zeroinitializer + ret <16 x i8> %vld1. +} + +declare <16 x i8> @llvm.arm.neon.vld1.v16i8(i8* , i32 ) + diff --git a/test/CodeGen/Thumb2/v8_IT_2.ll b/test/CodeGen/Thumb2/v8_IT_2.ll new file mode 100644 index 00000000000..fe88316d147 --- /dev/null +++ b/test/CodeGen/Thumb2/v8_IT_2.ll @@ -0,0 +1,37 @@ +; RUN: llc < %s -mtriple=thumbv8 | FileCheck %s + + %struct.quad_struct = type { i32, i32, %struct.quad_struct*, %struct.quad_struct*, %struct.quad_struct*, %struct.quad_struct*, %struct.quad_struct* } + +define fastcc i32 @CountTree(%struct.quad_struct* %tree) { +entry: +; CHECK-LABEL: CountTree: +; CHECK: bne +; CHECK: cmp +; CHECK: it eq +; CHECK: cmpeq +; CHECK: bne +; CHECK: mov +; CHECK: pop + br label %tailrecurse + +tailrecurse: ; preds = %bb, %entry + %tmp6 = load %struct.quad_struct** null ; <%struct.quad_struct*> [#uses=1] + %tmp9 = load %struct.quad_struct** null ; <%struct.quad_struct*> [#uses=2] + %tmp12 = load %struct.quad_struct** null ; <%struct.quad_struct*> [#uses=1] + %tmp14 = icmp eq %struct.quad_struct* null, null ; [#uses=1] + %tmp17 = icmp eq %struct.quad_struct* %tmp6, null ; [#uses=1] + %tmp23 = icmp eq %struct.quad_struct* %tmp9, null ; [#uses=1] + %tmp29 = icmp eq %struct.quad_struct* %tmp12, null ; [#uses=1] + %bothcond = and i1 %tmp17, %tmp14 ; [#uses=1] + %bothcond1 = and i1 %bothcond, %tmp23 ; [#uses=1] + %bothcond2 = and i1 %bothcond1, %tmp29 ; [#uses=1] + br i1 %bothcond2, label %return, label %bb + +bb: ; preds = %tailrecurse + %tmp41 = tail call fastcc i32 @CountTree( %struct.quad_struct* %tmp9 ) ; [#uses=0] + br label %tailrecurse + +return: ; preds = %tailrecurse + ret i32 0 +} + diff --git a/test/CodeGen/Thumb2/v8_IT_3.ll b/test/CodeGen/Thumb2/v8_IT_3.ll new file mode 100644 index 00000000000..4d90891734d --- /dev/null +++ b/test/CodeGen/Thumb2/v8_IT_3.ll @@ -0,0 +1,75 @@ +; RUN: llc < %s -mtriple=thumbv8 | FileCheck %s +; RUN: llc < %s -mtriple=thumbv8 -relocation-model=pic | FileCheck %s --check-prefix=CHECK-PIC + +%struct.FF = type { i32 (i32*)*, i32 (i32*, i32*, i32, i32, i32, i32)*, i32 (i32, i32, i8*)*, void ()*, i32 (i32, i8*, i32*)*, i32 ()* } +%struct.BD = type { %struct.BD*, i32, i32, i32, i32, i64, i32 (%struct.BD*, i8*, i64, i32)*, i32 (%struct.BD*, i8*, i32, i32)*, i32 (%struct.BD*, i8*, i64, i32)*, i32 (%struct.BD*, i8*, i32, i32)*, i32 (%struct.BD*, i64, i32)*, [16 x i8], i64, i64 } + +@FuncPtr = external hidden unnamed_addr global %struct.FF* +@.str1 = external hidden unnamed_addr constant [6 x i8], align 4 +@G = external unnamed_addr global i32 +@.str2 = external hidden unnamed_addr constant [58 x i8], align 4 +@.str3 = external hidden unnamed_addr constant [58 x i8], align 4 + +define i32 @test() nounwind optsize ssp { +entry: +; CHECK-LABEL: test: +; CHECK: push +; CHECK-NOT: push + %block_size = alloca i32, align 4 + %block_count = alloca i32, align 4 + %index_cache = alloca i32, align 4 + store i32 0, i32* %index_cache, align 4 + %tmp = load i32* @G, align 4 + %tmp1 = call i32 @bar(i32 0, i32 0, i32 %tmp) nounwind + switch i32 %tmp1, label %bb8 [ + i32 0, label %bb + i32 536870913, label %bb4 + i32 536870914, label %bb6 + ] + +bb: + %tmp2 = load i32* @G, align 4 + %tmp4 = icmp eq i32 %tmp2, 0 + br i1 %tmp4, label %bb1, label %bb8 + +bb1: +; CHECK: %bb6 +; CHECK: it eq +; CHECK-NEXT: ldreq +; CHECK-NEXT: it eq +; CHECK-NEXT: cmpeq +; CHECK: %bb1 + %tmp5 = load i32* %block_size, align 4 + %tmp6 = load i32* %block_count, align 4 + %tmp7 = call %struct.FF* @Get() nounwind + store %struct.FF* %tmp7, %struct.FF** @FuncPtr, align 4 + %tmp10 = zext i32 %tmp6 to i64 + %tmp11 = zext i32 %tmp5 to i64 + %tmp12 = mul nsw i64 %tmp10, %tmp11 + %tmp13 = call i32 @foo(i8* getelementptr inbounds ([6 x i8]* @.str1, i32 0, i32 0), i64 %tmp12, i32 %tmp5) nounwind + br label %bb8 + +bb4: +; CHECK-PIC: cmp +; CHECK-PIC: cmp +; CHECK-PIC-NEXT: bne +; CHECK-PIC-NEXT: %bb4 +; CHECK-PIC-NEXT: movs +; CHECK-PIC-NEXT: add +; CHECK-PIC-NEXT: pop + ret i32 0 + +bb6: + ret i32 1 + +bb8: + ret i32 -1 +} + +declare i32 @printf(i8*, ...) + +declare %struct.FF* @Get() + +declare i32 @foo(i8*, i64, i32) + +declare i32 @bar(i32, i32, i32) diff --git a/test/CodeGen/Thumb2/v8_IT_4.ll b/test/CodeGen/Thumb2/v8_IT_4.ll new file mode 100644 index 00000000000..45c79f430f1 --- /dev/null +++ b/test/CodeGen/Thumb2/v8_IT_4.ll @@ -0,0 +1,43 @@ +; RUN: llc < %s -mtriple=thumbv8-eabi -float-abi=hard | FileCheck %s +; RUN: llc < %s -mtriple=thumbv8-eabi -float-abi=hard -regalloc=basic | FileCheck %s + +%"struct.__gnu_cxx::__normal_iterator, std::allocator > >" = type { i8* } +%"struct.__gnu_cxx::new_allocator" = type <{ i8 }> +%"struct.std::basic_string,std::allocator >" = type { %"struct.__gnu_cxx::__normal_iterator, std::allocator > >" } +%"struct.std::basic_string,std::allocator >::_Rep" = type { %"struct.std::basic_string,std::allocator >::_Rep_base" } +%"struct.std::basic_string,std::allocator >::_Rep_base" = type { i32, i32, i32 } + + +define weak arm_aapcs_vfpcc i32 @_ZNKSs7compareERKSs(%"struct.std::basic_string,std::allocator >"* %this, %"struct.std::basic_string,std::allocator >"* %__str) { +; CHECK-LABEL: _ZNKSs7compareERKSs: +; CHECK: cbnz r0, +; CHECK-NEXT: %bb +; CHECK-NEXT: sub{{(.w)?}} r0, r{{[0-9]+}}, r{{[0-9]+}} +; CHECK-NEXT: %bb1 +; CHECK-NEXT: pop.w +entry: + %0 = tail call arm_aapcs_vfpcc i32 @_ZNKSs4sizeEv(%"struct.std::basic_string,std::allocator >"* %this) ; [#uses=3] + %1 = tail call arm_aapcs_vfpcc i32 @_ZNKSs4sizeEv(%"struct.std::basic_string,std::allocator >"* %__str) ; [#uses=3] + %2 = icmp ult i32 %1, %0 ; [#uses=1] + %3 = select i1 %2, i32 %1, i32 %0 ; [#uses=1] + %4 = tail call arm_aapcs_vfpcc i8* @_ZNKSs7_M_dataEv(%"struct.std::basic_string,std::allocator >"* %this) ; [#uses=1] + %5 = tail call arm_aapcs_vfpcc i8* @_ZNKSs4dataEv(%"struct.std::basic_string,std::allocator >"* %__str) ; [#uses=1] + %6 = tail call arm_aapcs_vfpcc i32 @memcmp(i8* %4, i8* %5, i32 %3) nounwind readonly ; [#uses=2] + %7 = icmp eq i32 %6, 0 ; [#uses=1] + br i1 %7, label %bb, label %bb1 + +bb: ; preds = %entry + %8 = sub i32 %0, %1 ; [#uses=1] + ret i32 %8 + +bb1: ; preds = %entry + ret i32 %6 +} + +declare arm_aapcs_vfpcc i32 @memcmp(i8* nocapture, i8* nocapture, i32) nounwind readonly + +declare arm_aapcs_vfpcc i32 @_ZNKSs4sizeEv(%"struct.std::basic_string,std::allocator >"* %this) + +declare arm_aapcs_vfpcc i8* @_ZNKSs7_M_dataEv(%"struct.std::basic_string,std::allocator >"* %this) + +declare arm_aapcs_vfpcc i8* @_ZNKSs4dataEv(%"struct.std::basic_string,std::allocator >"* %this) diff --git a/test/CodeGen/Thumb2/v8_IT_5.ll b/test/CodeGen/Thumb2/v8_IT_5.ll new file mode 100644 index 00000000000..3866068806e --- /dev/null +++ b/test/CodeGen/Thumb2/v8_IT_5.ll @@ -0,0 +1,62 @@ +; RUN: llc < %s -mtriple=thumbv8 | FileCheck %s +; CHECK: it ne +; CHECK-NEXT: cmpne +; CHECK-NEXT: beq +; CHECK: cmp +; CHECK-NEXT: beq +; CHECK-NEXT: %if.else163 +; CHECK-NEXT: mov.w +; CHECK-NEXT: b +; CHECK-NEXT: %if.else145 +; CHECK-NEXT: mov.w + +%struct.hc = type { i32, i32, i32, i32 } + +define i32 @t(i32 %type) optsize { +entry: + br i1 undef, label %if.then, label %if.else + +if.then: + unreachable + +if.else: + br i1 undef, label %if.then15, label %if.else18 + +if.then15: + unreachable + +if.else18: + switch i32 %type, label %if.else173 [ + i32 3, label %if.then115 + i32 1, label %if.then102 + ] + +if.then102: + br i1 undef, label %cond.true10.i, label %t.exit + +cond.true10.i: + br label %t.exit + +t.exit: + unreachable + +if.then115: + br i1 undef, label %if.else163, label %if.else145 + +if.else145: + %call150 = call fastcc %struct.hc* @foo(%struct.hc* undef, i32 34865152) optsize + br label %while.body172 + +if.else163: + %call168 = call fastcc %struct.hc* @foo(%struct.hc* undef, i32 34078720) optsize + br label %while.body172 + +while.body172: + br label %while.body172 + +if.else173: + ret i32 -1 +} + +declare hidden fastcc %struct.hc* @foo(%struct.hc* nocapture, i32) nounwind optsize +