1
0
mirror of https://github.com/RPCS3/llvm-mirror.git synced 2024-11-24 03:33:20 +01:00

ARM: rework Thumb1 frame index rewriting

The previous code had a few problems, motivating the choices here.

1. It could create instructions clobbering CPSR, but the incoming MachineInstr
   didn't reflect this. A potential source of corruption. This is why the patch
   has a new PseudoInst for before lowering.
2. Similarly, there was some code to handle the incoming instruction not being
   ARMCC::AL, but this would have caused massive problems if it was actually
   invoked when a complex offset needing more than one instruction was requested.
3. It wasn't designed to handle unaligned pointers (or offsets). These should
   probably be minimised anyway, but the code needs to deal with them properly
   regardless.
4. It had some rather dubious ad-hoc code to avoid calling
   emitThumbRegPlusImmediate, a function which should be designed to do precisely
   this job.

We seem to cover the common cases correctly now, and hopefully can enhance
emitThumbRegPlusImmediate to handle any extra optimisations we need to add in
future.

llvm-svn: 220236
This commit is contained in:
Tim Northover 2014-10-20 21:28:41 +00:00
parent 31013fe069
commit 7a41a526ce
6 changed files with 102 additions and 114 deletions

View File

@ -585,7 +585,7 @@ materializeFrameBaseRegister(MachineBasicBlock *MBB,
int64_t Offset) const {
ARMFunctionInfo *AFI = MBB->getParent()->getInfo<ARMFunctionInfo>();
unsigned ADDriOpc = !AFI->isThumbFunction() ? ARM::ADDri :
(AFI->isThumb1OnlyFunction() ? ARM::tADDrSPi : ARM::t2ADDri);
(AFI->isThumb1OnlyFunction() ? ARM::tADDframe : ARM::t2ADDri);
MachineBasicBlock::iterator Ins = MBB->begin();
DebugLoc DL; // Defaults to "unknown"
@ -598,11 +598,11 @@ materializeFrameBaseRegister(MachineBasicBlock *MBB,
const MCInstrDesc &MCID = TII.get(ADDriOpc);
MRI.constrainRegClass(BaseReg, TII.getRegClass(MCID, 0, this, MF));
MachineInstrBuilder MIB = AddDefaultPred(BuildMI(*MBB, Ins, DL, MCID, BaseReg)
.addFrameIndex(FrameIdx).addImm(Offset));
MachineInstrBuilder MIB = BuildMI(*MBB, Ins, DL, MCID, BaseReg)
.addFrameIndex(FrameIdx).addImm(Offset);
if (!AFI->isThumb1OnlyFunction())
AddDefaultCC(MIB);
AddDefaultCC(AddDefaultPred(MIB));
}
void ARMBaseRegisterInfo::resolveFrameIndex(MachineInstr &MI, unsigned BaseReg,

View File

@ -2497,9 +2497,8 @@ SDNode *ARMDAGToDAGISel::Select(SDNode *N) {
int FI = cast<FrameIndexSDNode>(N)->getIndex();
SDValue TFI = CurDAG->getTargetFrameIndex(FI, TLI->getPointerTy());
if (Subtarget->isThumb1Only()) {
SDValue Ops[] = { TFI, CurDAG->getTargetConstant(0, MVT::i32),
getAL(CurDAG), CurDAG->getRegister(0, MVT::i32) };
return CurDAG->SelectNodeTo(N, ARM::tADDrSPi, MVT::i32, Ops);
return CurDAG->SelectNodeTo(N, ARM::tADDframe, MVT::i32, TFI,
CurDAG->getTargetConstant(0, MVT::i32));
} else {
unsigned Opc = ((Subtarget->isThumb() && Subtarget->hasThumb2()) ?
ARM::t2ADDri : ARM::ADDri);

View File

@ -6571,9 +6571,9 @@ SetupEntryBlockForSjLj(MachineInstr *MI, MachineBasicBlock *MBB,
.addReg(NewVReg2, RegState::Kill)
.addReg(NewVReg3, RegState::Kill));
unsigned NewVReg5 = MRI->createVirtualRegister(TRC);
AddDefaultPred(BuildMI(*MBB, MI, dl, TII->get(ARM::tADDrSPi), NewVReg5)
.addFrameIndex(FI)
.addImm(36)); // &jbuf[1] :: pc
BuildMI(*MBB, MI, dl, TII->get(ARM::tADDframe), NewVReg5)
.addFrameIndex(FI)
.addImm(36); // &jbuf[1] :: pc
AddDefaultPred(BuildMI(*MBB, MI, dl, TII->get(ARM::tSTRi))
.addReg(NewVReg4, RegState::Kill)
.addReg(NewVReg5, RegState::Kill)

View File

@ -360,6 +360,14 @@ def tADDrSPi : T1pI<(outs tGPR:$dst), (ins GPRsp:$sp, t_imm0_1020s4:$imm),
let DecoderMethod = "DecodeThumbAddSpecialReg";
}
// Thumb1 frame lowering is rather fragile, we hope to be able to use
// tADDrSPi, but we may need to insert a sequence that clobbers CPSR.
def tADDframe : PseudoInst<(outs tGPR:$dst), (ins i32imm:$base, i32imm:$offset),
NoItinerary, []>,
Requires<[IsThumb, IsThumb1Only]> {
let Defs = [CPSR];
}
// ADD sp, sp, #<imm7>
def tADDspi : T1pIt<(outs GPRsp:$Rdn), (ins GPRsp:$Rn, t_imm0_508s4:$imm),
IIC_iALUi, "add", "\t$Rdn, $imm", []>,

View File

@ -196,6 +196,7 @@ void llvm::emitThumbRegPlusImmediate(MachineBasicBlock &MBB,
Bytes &= ~3;
ExtraOpc = ARM::tADDi3;
}
DstNotEqBase = true;
NumBits = 8;
Scale = 4;
Opc = ARM::tADDrSPi;
@ -241,6 +242,12 @@ void llvm::emitThumbRegPlusImmediate(MachineBasicBlock &MBB,
AddDefaultT1CC(BuildMI(MBB, MBBI, dl, MCID, DestReg)
.setMIFlags(MIFlags));
AddDefaultPred(MIB.addReg(BaseReg, RegState::Kill).addImm(ThisVal));
} else if (isARMLowRegister(DestReg) && BaseReg == ARM::SP && Bytes > 0) {
unsigned ThisVal = std::min(1020U, Bytes / 4 * 4);
Bytes -= ThisVal;
AddDefaultPred(BuildMI(MBB, MBBI, dl, TII.get(ARM::tADDrSPi), DestReg)
.addReg(BaseReg, RegState::Kill).addImm(ThisVal / 4))
.setMIFlags(MIFlags);
} else {
AddDefaultPred(BuildMI(MBB, MBBI, dl, TII.get(ARM::tMOVr), DestReg)
.addReg(BaseReg, RegState::Kill))
@ -294,32 +301,6 @@ void llvm::emitThumbRegPlusImmediate(MachineBasicBlock &MBB,
}
}
/// emitThumbConstant - Emit a series of instructions to materialize a
/// constant.
static void emitThumbConstant(MachineBasicBlock &MBB,
MachineBasicBlock::iterator &MBBI,
unsigned DestReg, int Imm,
const TargetInstrInfo &TII,
const Thumb1RegisterInfo& MRI,
DebugLoc dl) {
bool isSub = Imm < 0;
if (isSub) Imm = -Imm;
int Chunk = (1 << 8) - 1;
int ThisVal = (Imm > Chunk) ? Chunk : Imm;
Imm -= ThisVal;
AddDefaultPred(AddDefaultT1CC(BuildMI(MBB, MBBI, dl, TII.get(ARM::tMOVi8),
DestReg))
.addImm(ThisVal));
if (Imm > 0)
emitThumbRegPlusImmediate(MBB, MBBI, dl, DestReg, DestReg, Imm, TII, MRI);
if (isSub) {
const MCInstrDesc &MCID = TII.get(ARM::tRSB);
AddDefaultPred(AddDefaultT1CC(BuildMI(MBB, MBBI, dl, MCID, DestReg))
.addReg(DestReg, RegState::Kill));
}
}
static void removeOperands(MachineInstr &MI, unsigned i) {
unsigned Op = i;
for (unsigned e = MI.getNumOperands(); i != e; ++i)
@ -352,85 +333,13 @@ rewriteFrameIndex(MachineBasicBlock::iterator II, unsigned FrameRegIdx,
const MCInstrDesc &Desc = MI.getDesc();
unsigned AddrMode = (Desc.TSFlags & ARMII::AddrModeMask);
if (Opcode == ARM::tADDrSPi) {
if (Opcode == ARM::tADDframe) {
Offset += MI.getOperand(FrameRegIdx+1).getImm();
// Can't use tADDrSPi if it's based off the frame pointer.
unsigned NumBits = 0;
unsigned Scale = 1;
if (FrameReg != ARM::SP) {
Opcode = ARM::tADDi3;
NumBits = 3;
} else {
NumBits = 8;
Scale = 4;
}
unsigned PredReg;
if (Offset == 0 && getInstrPredicate(&MI, PredReg) == ARMCC::AL) {
// Turn it into a move.
MI.setDesc(TII.get(ARM::tMOVr));
MI.getOperand(FrameRegIdx).ChangeToRegister(FrameReg, false);
// Remove offset
MI.RemoveOperand(FrameRegIdx+1);
return true;
}
// Common case: small offset, fits into instruction.
unsigned Mask = (1 << NumBits) - 1;
if (Offset % Scale == 0 && ((Offset / Scale) & ~Mask) == 0) {
// Replace the FrameIndex with sp / fp
if (Opcode == ARM::tADDi3) {
MI.setDesc(TII.get(Opcode));
removeOperands(MI, FrameRegIdx);
AddDefaultPred(AddDefaultT1CC(MIB).addReg(FrameReg)
.addImm(Offset / Scale));
} else {
MI.getOperand(FrameRegIdx).ChangeToRegister(FrameReg, false);
MI.getOperand(FrameRegIdx+1).ChangeToImmediate(Offset / Scale);
}
return true;
}
unsigned DestReg = MI.getOperand(0).getReg();
unsigned Bytes = (Offset > 0) ? Offset : -Offset;
unsigned NumMIs = calcNumMI(Opcode, 0, Bytes, NumBits, Scale);
// MI would expand into a large number of instructions. Don't try to
// simplify the immediate.
if (NumMIs > 2) {
emitThumbRegPlusImmediate(MBB, II, dl, DestReg, FrameReg, Offset, TII,
*this);
MBB.erase(II);
return true;
}
if (Offset > 0) {
// Translate r0 = add sp, imm to
// r0 = add sp, 255*4
// r0 = add r0, (imm - 255*4)
if (Opcode == ARM::tADDi3) {
MI.setDesc(TII.get(Opcode));
removeOperands(MI, FrameRegIdx);
AddDefaultPred(AddDefaultT1CC(MIB).addReg(FrameReg).addImm(Mask));
} else {
MI.getOperand(FrameRegIdx).ChangeToRegister(FrameReg, false);
MI.getOperand(FrameRegIdx+1).ChangeToImmediate(Mask);
}
Offset = (Offset - Mask * Scale);
MachineBasicBlock::iterator NII = std::next(II);
MI.getOperand(0).setIsDead(false);
emitThumbRegPlusImmediate(MBB, NII, dl, DestReg, DestReg, Offset, TII,
*this);
} else {
// Translate r0 = add sp, -imm to
// r0 = -imm (this is then translated into a series of instructions)
// r0 = add r0, sp
emitThumbConstant(MBB, II, DestReg, Offset, TII, *this, dl);
MI.setDesc(TII.get(ARM::tADDhirr));
MI.getOperand(FrameRegIdx).ChangeToRegister(DestReg, false, false, true);
MI.getOperand(FrameRegIdx+1).ChangeToRegister(FrameReg, false);
}
emitThumbRegPlusImmediate(MBB, II, dl, DestReg, FrameReg, Offset, TII,
*this);
MBB.erase(II);
return true;
} else {
if (AddrMode != ARMII::AddrModeT1_s)

View File

@ -1,13 +1,13 @@
; RUN: llc < %s -mtriple=thumbv6-apple-darwin | FileCheck %s
; RUN: llc < %s -mtriple=thumbv6-apple-darwin -regalloc=basic | FileCheck %s
; rdar://8819685
@__bar = external hidden global i8*
@__baz = external hidden global i8*
; rdar://8819685
define i8* @_foo() {
entry:
; CHECK: foo:
; CHECK-LABEL: foo:
%size = alloca i32, align 4
%0 = load i8** @__bar, align 4
@ -40,3 +40,75 @@ bb3:
declare noalias i8* @strdup(i8* nocapture) nounwind
declare i32 @_called_func(i8*, i32*) nounwind
; Variable ending up at unaligned offset from sp (i.e. not a multiple of 4)
define void @test_local_var_addr() {
; CHECK-LABEL: test_local_var_addr:
%addr1 = alloca i8
%addr2 = alloca i8
; CHECK: mov r0, sp
; CHECK: adds r0, r0, #{{[0-9]+}}
; CHECK: blx _take_ptr
call void @take_ptr(i8* %addr1)
; CHECK: mov r0, sp
; CHECK: adds r0, r0, #{{[0-9]+}}
; CHECK: blx _take_ptr
call void @take_ptr(i8* %addr2)
ret void
}
; Simple variable ending up *at* sp.
define void @test_simple_var() {
; CHECK-LABEL: test_simple_var:
%addr32 = alloca i32
%addr8 = bitcast i32* %addr32 to i8*
; CHECK: mov r0, sp
; CHECK-NOT: adds r0
; CHECK: blx _take_ptr
call void @take_ptr(i8* %addr8)
ret void
}
; Simple variable ending up at aligned offset from sp.
define void @test_local_var_addr_aligned() {
; CHECK-LABEL: test_local_var_addr_aligned:
%addr1.32 = alloca i32
%addr1 = bitcast i32* %addr1.32 to i8*
%addr2.32 = alloca i32
%addr2 = bitcast i32* %addr2.32 to i8*
; CHECK: add r0, sp, #{{[0-9]+}}
; CHECK: blx _take_ptr
call void @take_ptr(i8* %addr1)
; CHECK: mov r0, sp
; CHECK-NOT: add r0
; CHECK: blx _take_ptr
call void @take_ptr(i8* %addr2)
ret void
}
; Simple variable ending up at aligned offset from sp.
define void @test_local_var_big_offset() {
; CHECK-LABEL: test_local_var_big_offset:
%addr1.32 = alloca i32, i32 257
%addr1 = bitcast i32* %addr1.32 to i8*
%addr2.32 = alloca i32, i32 257
; CHECK: add [[RTMP:r[0-9]+]], sp, #1020
; CHECL: add r0, [[RTMP]], #8
; CHECK: blx _take_ptr
call void @take_ptr(i8* %addr1)
ret void
}
declare void @take_ptr(i8*)