diff --git a/lib/Target/ARM/ARMBaseRegisterInfo.cpp b/lib/Target/ARM/ARMBaseRegisterInfo.cpp index b01f2a75bc1..6dc0493c797 100644 --- a/lib/Target/ARM/ARMBaseRegisterInfo.cpp +++ b/lib/Target/ARM/ARMBaseRegisterInfo.cpp @@ -585,7 +585,7 @@ materializeFrameBaseRegister(MachineBasicBlock *MBB, int64_t Offset) const { ARMFunctionInfo *AFI = MBB->getParent()->getInfo(); unsigned ADDriOpc = !AFI->isThumbFunction() ? ARM::ADDri : - (AFI->isThumb1OnlyFunction() ? ARM::tADDrSPi : ARM::t2ADDri); + (AFI->isThumb1OnlyFunction() ? ARM::tADDframe : ARM::t2ADDri); MachineBasicBlock::iterator Ins = MBB->begin(); DebugLoc DL; // Defaults to "unknown" @@ -598,11 +598,11 @@ materializeFrameBaseRegister(MachineBasicBlock *MBB, const MCInstrDesc &MCID = TII.get(ADDriOpc); MRI.constrainRegClass(BaseReg, TII.getRegClass(MCID, 0, this, MF)); - MachineInstrBuilder MIB = AddDefaultPred(BuildMI(*MBB, Ins, DL, MCID, BaseReg) - .addFrameIndex(FrameIdx).addImm(Offset)); + MachineInstrBuilder MIB = BuildMI(*MBB, Ins, DL, MCID, BaseReg) + .addFrameIndex(FrameIdx).addImm(Offset); if (!AFI->isThumb1OnlyFunction()) - AddDefaultCC(MIB); + AddDefaultCC(AddDefaultPred(MIB)); } void ARMBaseRegisterInfo::resolveFrameIndex(MachineInstr &MI, unsigned BaseReg, diff --git a/lib/Target/ARM/ARMISelDAGToDAG.cpp b/lib/Target/ARM/ARMISelDAGToDAG.cpp index 19bc2cab668..2d546dfd198 100644 --- a/lib/Target/ARM/ARMISelDAGToDAG.cpp +++ b/lib/Target/ARM/ARMISelDAGToDAG.cpp @@ -2497,9 +2497,8 @@ SDNode *ARMDAGToDAGISel::Select(SDNode *N) { int FI = cast(N)->getIndex(); SDValue TFI = CurDAG->getTargetFrameIndex(FI, TLI->getPointerTy()); if (Subtarget->isThumb1Only()) { - SDValue Ops[] = { TFI, CurDAG->getTargetConstant(0, MVT::i32), - getAL(CurDAG), CurDAG->getRegister(0, MVT::i32) }; - return CurDAG->SelectNodeTo(N, ARM::tADDrSPi, MVT::i32, Ops); + return CurDAG->SelectNodeTo(N, ARM::tADDframe, MVT::i32, TFI, + CurDAG->getTargetConstant(0, MVT::i32)); } else { unsigned Opc = ((Subtarget->isThumb() && Subtarget->hasThumb2()) ? ARM::t2ADDri : ARM::ADDri); diff --git a/lib/Target/ARM/ARMISelLowering.cpp b/lib/Target/ARM/ARMISelLowering.cpp index d63557a34ab..7df13834055 100644 --- a/lib/Target/ARM/ARMISelLowering.cpp +++ b/lib/Target/ARM/ARMISelLowering.cpp @@ -6571,9 +6571,9 @@ SetupEntryBlockForSjLj(MachineInstr *MI, MachineBasicBlock *MBB, .addReg(NewVReg2, RegState::Kill) .addReg(NewVReg3, RegState::Kill)); unsigned NewVReg5 = MRI->createVirtualRegister(TRC); - AddDefaultPred(BuildMI(*MBB, MI, dl, TII->get(ARM::tADDrSPi), NewVReg5) - .addFrameIndex(FI) - .addImm(36)); // &jbuf[1] :: pc + BuildMI(*MBB, MI, dl, TII->get(ARM::tADDframe), NewVReg5) + .addFrameIndex(FI) + .addImm(36); // &jbuf[1] :: pc AddDefaultPred(BuildMI(*MBB, MI, dl, TII->get(ARM::tSTRi)) .addReg(NewVReg4, RegState::Kill) .addReg(NewVReg5, RegState::Kill) diff --git a/lib/Target/ARM/ARMInstrThumb.td b/lib/Target/ARM/ARMInstrThumb.td index 2c873ac9954..a86784432ad 100644 --- a/lib/Target/ARM/ARMInstrThumb.td +++ b/lib/Target/ARM/ARMInstrThumb.td @@ -360,6 +360,14 @@ def tADDrSPi : T1pI<(outs tGPR:$dst), (ins GPRsp:$sp, t_imm0_1020s4:$imm), let DecoderMethod = "DecodeThumbAddSpecialReg"; } +// Thumb1 frame lowering is rather fragile, we hope to be able to use +// tADDrSPi, but we may need to insert a sequence that clobbers CPSR. +def tADDframe : PseudoInst<(outs tGPR:$dst), (ins i32imm:$base, i32imm:$offset), + NoItinerary, []>, + Requires<[IsThumb, IsThumb1Only]> { + let Defs = [CPSR]; +} + // ADD sp, sp, # def tADDspi : T1pIt<(outs GPRsp:$Rdn), (ins GPRsp:$Rn, t_imm0_508s4:$imm), IIC_iALUi, "add", "\t$Rdn, $imm", []>, diff --git a/lib/Target/ARM/Thumb1RegisterInfo.cpp b/lib/Target/ARM/Thumb1RegisterInfo.cpp index 291e97b0708..690db9cf2ae 100644 --- a/lib/Target/ARM/Thumb1RegisterInfo.cpp +++ b/lib/Target/ARM/Thumb1RegisterInfo.cpp @@ -196,6 +196,7 @@ void llvm::emitThumbRegPlusImmediate(MachineBasicBlock &MBB, Bytes &= ~3; ExtraOpc = ARM::tADDi3; } + DstNotEqBase = true; NumBits = 8; Scale = 4; Opc = ARM::tADDrSPi; @@ -241,6 +242,12 @@ void llvm::emitThumbRegPlusImmediate(MachineBasicBlock &MBB, AddDefaultT1CC(BuildMI(MBB, MBBI, dl, MCID, DestReg) .setMIFlags(MIFlags)); AddDefaultPred(MIB.addReg(BaseReg, RegState::Kill).addImm(ThisVal)); + } else if (isARMLowRegister(DestReg) && BaseReg == ARM::SP && Bytes > 0) { + unsigned ThisVal = std::min(1020U, Bytes / 4 * 4); + Bytes -= ThisVal; + AddDefaultPred(BuildMI(MBB, MBBI, dl, TII.get(ARM::tADDrSPi), DestReg) + .addReg(BaseReg, RegState::Kill).addImm(ThisVal / 4)) + .setMIFlags(MIFlags); } else { AddDefaultPred(BuildMI(MBB, MBBI, dl, TII.get(ARM::tMOVr), DestReg) .addReg(BaseReg, RegState::Kill)) @@ -294,32 +301,6 @@ void llvm::emitThumbRegPlusImmediate(MachineBasicBlock &MBB, } } -/// emitThumbConstant - Emit a series of instructions to materialize a -/// constant. -static void emitThumbConstant(MachineBasicBlock &MBB, - MachineBasicBlock::iterator &MBBI, - unsigned DestReg, int Imm, - const TargetInstrInfo &TII, - const Thumb1RegisterInfo& MRI, - DebugLoc dl) { - bool isSub = Imm < 0; - if (isSub) Imm = -Imm; - - int Chunk = (1 << 8) - 1; - int ThisVal = (Imm > Chunk) ? Chunk : Imm; - Imm -= ThisVal; - AddDefaultPred(AddDefaultT1CC(BuildMI(MBB, MBBI, dl, TII.get(ARM::tMOVi8), - DestReg)) - .addImm(ThisVal)); - if (Imm > 0) - emitThumbRegPlusImmediate(MBB, MBBI, dl, DestReg, DestReg, Imm, TII, MRI); - if (isSub) { - const MCInstrDesc &MCID = TII.get(ARM::tRSB); - AddDefaultPred(AddDefaultT1CC(BuildMI(MBB, MBBI, dl, MCID, DestReg)) - .addReg(DestReg, RegState::Kill)); - } -} - static void removeOperands(MachineInstr &MI, unsigned i) { unsigned Op = i; for (unsigned e = MI.getNumOperands(); i != e; ++i) @@ -352,85 +333,13 @@ rewriteFrameIndex(MachineBasicBlock::iterator II, unsigned FrameRegIdx, const MCInstrDesc &Desc = MI.getDesc(); unsigned AddrMode = (Desc.TSFlags & ARMII::AddrModeMask); - if (Opcode == ARM::tADDrSPi) { + if (Opcode == ARM::tADDframe) { Offset += MI.getOperand(FrameRegIdx+1).getImm(); - - // Can't use tADDrSPi if it's based off the frame pointer. - unsigned NumBits = 0; - unsigned Scale = 1; - if (FrameReg != ARM::SP) { - Opcode = ARM::tADDi3; - NumBits = 3; - } else { - NumBits = 8; - Scale = 4; - } - - unsigned PredReg; - if (Offset == 0 && getInstrPredicate(&MI, PredReg) == ARMCC::AL) { - // Turn it into a move. - MI.setDesc(TII.get(ARM::tMOVr)); - MI.getOperand(FrameRegIdx).ChangeToRegister(FrameReg, false); - // Remove offset - MI.RemoveOperand(FrameRegIdx+1); - return true; - } - - // Common case: small offset, fits into instruction. - unsigned Mask = (1 << NumBits) - 1; - if (Offset % Scale == 0 && ((Offset / Scale) & ~Mask) == 0) { - // Replace the FrameIndex with sp / fp - if (Opcode == ARM::tADDi3) { - MI.setDesc(TII.get(Opcode)); - removeOperands(MI, FrameRegIdx); - AddDefaultPred(AddDefaultT1CC(MIB).addReg(FrameReg) - .addImm(Offset / Scale)); - } else { - MI.getOperand(FrameRegIdx).ChangeToRegister(FrameReg, false); - MI.getOperand(FrameRegIdx+1).ChangeToImmediate(Offset / Scale); - } - return true; - } - unsigned DestReg = MI.getOperand(0).getReg(); - unsigned Bytes = (Offset > 0) ? Offset : -Offset; - unsigned NumMIs = calcNumMI(Opcode, 0, Bytes, NumBits, Scale); - // MI would expand into a large number of instructions. Don't try to - // simplify the immediate. - if (NumMIs > 2) { - emitThumbRegPlusImmediate(MBB, II, dl, DestReg, FrameReg, Offset, TII, - *this); - MBB.erase(II); - return true; - } - if (Offset > 0) { - // Translate r0 = add sp, imm to - // r0 = add sp, 255*4 - // r0 = add r0, (imm - 255*4) - if (Opcode == ARM::tADDi3) { - MI.setDesc(TII.get(Opcode)); - removeOperands(MI, FrameRegIdx); - AddDefaultPred(AddDefaultT1CC(MIB).addReg(FrameReg).addImm(Mask)); - } else { - MI.getOperand(FrameRegIdx).ChangeToRegister(FrameReg, false); - MI.getOperand(FrameRegIdx+1).ChangeToImmediate(Mask); - } - Offset = (Offset - Mask * Scale); - MachineBasicBlock::iterator NII = std::next(II); - MI.getOperand(0).setIsDead(false); - emitThumbRegPlusImmediate(MBB, NII, dl, DestReg, DestReg, Offset, TII, - *this); - } else { - // Translate r0 = add sp, -imm to - // r0 = -imm (this is then translated into a series of instructions) - // r0 = add r0, sp - emitThumbConstant(MBB, II, DestReg, Offset, TII, *this, dl); - - MI.setDesc(TII.get(ARM::tADDhirr)); - MI.getOperand(FrameRegIdx).ChangeToRegister(DestReg, false, false, true); - MI.getOperand(FrameRegIdx+1).ChangeToRegister(FrameReg, false); - } + emitThumbRegPlusImmediate(MBB, II, dl, DestReg, FrameReg, Offset, TII, + *this); + MBB.erase(II); return true; } else { if (AddrMode != ARMII::AddrModeT1_s) diff --git a/test/CodeGen/ARM/thumb1-varalloc.ll b/test/CodeGen/ARM/thumb1-varalloc.ll index e07e8aab77a..19c5dbeceab 100644 --- a/test/CodeGen/ARM/thumb1-varalloc.ll +++ b/test/CodeGen/ARM/thumb1-varalloc.ll @@ -1,13 +1,13 @@ ; RUN: llc < %s -mtriple=thumbv6-apple-darwin | FileCheck %s ; RUN: llc < %s -mtriple=thumbv6-apple-darwin -regalloc=basic | FileCheck %s -; rdar://8819685 @__bar = external hidden global i8* @__baz = external hidden global i8* +; rdar://8819685 define i8* @_foo() { entry: -; CHECK: foo: +; CHECK-LABEL: foo: %size = alloca i32, align 4 %0 = load i8** @__bar, align 4 @@ -40,3 +40,75 @@ bb3: declare noalias i8* @strdup(i8* nocapture) nounwind declare i32 @_called_func(i8*, i32*) nounwind + +; Variable ending up at unaligned offset from sp (i.e. not a multiple of 4) +define void @test_local_var_addr() { +; CHECK-LABEL: test_local_var_addr: + + %addr1 = alloca i8 + %addr2 = alloca i8 + +; CHECK: mov r0, sp +; CHECK: adds r0, r0, #{{[0-9]+}} +; CHECK: blx _take_ptr + call void @take_ptr(i8* %addr1) + +; CHECK: mov r0, sp +; CHECK: adds r0, r0, #{{[0-9]+}} +; CHECK: blx _take_ptr + call void @take_ptr(i8* %addr2) + + ret void +} + +; Simple variable ending up *at* sp. +define void @test_simple_var() { +; CHECK-LABEL: test_simple_var: + + %addr32 = alloca i32 + %addr8 = bitcast i32* %addr32 to i8* + +; CHECK: mov r0, sp +; CHECK-NOT: adds r0 +; CHECK: blx _take_ptr + call void @take_ptr(i8* %addr8) + ret void +} + +; Simple variable ending up at aligned offset from sp. +define void @test_local_var_addr_aligned() { +; CHECK-LABEL: test_local_var_addr_aligned: + + %addr1.32 = alloca i32 + %addr1 = bitcast i32* %addr1.32 to i8* + %addr2.32 = alloca i32 + %addr2 = bitcast i32* %addr2.32 to i8* + +; CHECK: add r0, sp, #{{[0-9]+}} +; CHECK: blx _take_ptr + call void @take_ptr(i8* %addr1) + +; CHECK: mov r0, sp +; CHECK-NOT: add r0 +; CHECK: blx _take_ptr + call void @take_ptr(i8* %addr2) + + ret void +} + +; Simple variable ending up at aligned offset from sp. +define void @test_local_var_big_offset() { +; CHECK-LABEL: test_local_var_big_offset: + %addr1.32 = alloca i32, i32 257 + %addr1 = bitcast i32* %addr1.32 to i8* + %addr2.32 = alloca i32, i32 257 + +; CHECK: add [[RTMP:r[0-9]+]], sp, #1020 +; CHECL: add r0, [[RTMP]], #8 +; CHECK: blx _take_ptr + call void @take_ptr(i8* %addr1) + + ret void +} + +declare void @take_ptr(i8*)