1
0
mirror of https://github.com/RPCS3/llvm-mirror.git synced 2024-10-21 20:12:56 +02:00

Revert "ARMLoadStoreOptimizer: Create LDRD/STRD on thumb2"

This reverts commit r241926. This caused http://llvm.org/PR24190

llvm-svn: 242735
This commit is contained in:
Matthias Braun 2015-07-20 23:17:20 +00:00
parent 186006cfa8
commit e87c09c013
7 changed files with 45 additions and 132 deletions

View File

@ -111,10 +111,6 @@ namespace {
/// Index into the basic block where the merged instruction will be /// Index into the basic block where the merged instruction will be
/// inserted. (See MemOpQueueEntry.Position) /// inserted. (See MemOpQueueEntry.Position)
unsigned InsertPos; unsigned InsertPos;
/// Whether the instructions can be merged into a ldm/stm instruction.
bool CanMergeToLSMulti;
/// Whether the instructions can be merged into a ldrd/strd instruction.
bool CanMergeToLSDouble;
}; };
BumpPtrAllocator Allocator; BumpPtrAllocator Allocator;
SmallVector<const MergeCandidate*,4> Candidates; SmallVector<const MergeCandidate*,4> Candidates;
@ -126,14 +122,11 @@ namespace {
MachineBasicBlock::iterator MBBI, MachineBasicBlock::iterator MBBI,
DebugLoc DL, unsigned Base, unsigned WordOffset, DebugLoc DL, unsigned Base, unsigned WordOffset,
ARMCC::CondCodes Pred, unsigned PredReg); ARMCC::CondCodes Pred, unsigned PredReg);
MachineInstr *CreateLoadStoreMulti(MachineBasicBlock &MBB, MachineInstr *MergeOps(MachineBasicBlock &MBB,
MachineBasicBlock::iterator InsertBefore, int Offset, unsigned Base, MachineBasicBlock::iterator InsertBefore, int Offset,
bool BaseKill, unsigned Opcode, ARMCC::CondCodes Pred, unsigned PredReg, unsigned Base, bool BaseKill, unsigned Opcode,
DebugLoc DL, ArrayRef<std::pair<unsigned, bool>> Regs); ARMCC::CondCodes Pred, unsigned PredReg, DebugLoc DL,
MachineInstr *CreateLoadStoreDouble(MachineBasicBlock &MBB, ArrayRef<std::pair<unsigned, bool>> Regs);
MachineBasicBlock::iterator InsertBefore, int Offset, unsigned Base,
bool BaseKill, unsigned Opcode, ARMCC::CondCodes Pred, unsigned PredReg,
DebugLoc DL, ArrayRef<std::pair<unsigned, bool>> Regs) const;
void FormCandidates(const MemOpQueue &MemOps); void FormCandidates(const MemOpQueue &MemOps);
MachineInstr *MergeOpsUpdate(const MergeCandidate &Cand); MachineInstr *MergeOpsUpdate(const MergeCandidate &Cand);
bool FixInvalidRegPairOp(MachineBasicBlock &MBB, bool FixInvalidRegPairOp(MachineBasicBlock &MBB,
@ -562,10 +555,12 @@ static bool ContainsReg(const ArrayRef<std::pair<unsigned, bool>> &Regs,
/// Create and insert a LDM or STM with Base as base register and registers in /// Create and insert a LDM or STM with Base as base register and registers in
/// Regs as the register operands that would be loaded / stored. It returns /// Regs as the register operands that would be loaded / stored. It returns
/// true if the transformation is done. /// true if the transformation is done.
MachineInstr *ARMLoadStoreOpt::CreateLoadStoreMulti(MachineBasicBlock &MBB, MachineInstr *
MachineBasicBlock::iterator InsertBefore, int Offset, unsigned Base, ARMLoadStoreOpt::MergeOps(MachineBasicBlock &MBB,
bool BaseKill, unsigned Opcode, ARMCC::CondCodes Pred, unsigned PredReg, MachineBasicBlock::iterator InsertBefore, int Offset,
DebugLoc DL, ArrayRef<std::pair<unsigned, bool>> Regs) { unsigned Base, bool BaseKill, unsigned Opcode,
ARMCC::CondCodes Pred, unsigned PredReg, DebugLoc DL,
ArrayRef<std::pair<unsigned, bool>> Regs) {
unsigned NumRegs = Regs.size(); unsigned NumRegs = Regs.size();
assert(NumRegs > 1); assert(NumRegs > 1);
@ -754,28 +749,6 @@ MachineInstr *ARMLoadStoreOpt::CreateLoadStoreMulti(MachineBasicBlock &MBB,
return MIB.getInstr(); return MIB.getInstr();
} }
MachineInstr *ARMLoadStoreOpt::CreateLoadStoreDouble(MachineBasicBlock &MBB,
MachineBasicBlock::iterator InsertBefore, int Offset, unsigned Base,
bool BaseKill, unsigned Opcode, ARMCC::CondCodes Pred, unsigned PredReg,
DebugLoc DL, ArrayRef<std::pair<unsigned, bool>> Regs) const {
bool IsLoad = isi32Load(Opcode);
assert((IsLoad || isi32Store(Opcode)) && "Must have integer load or store");
unsigned LoadStoreOpcode = IsLoad ? ARM::t2LDRDi8 : ARM::t2STRDi8;
assert(Regs.size() == 2);
MachineInstrBuilder MIB = BuildMI(MBB, InsertBefore, DL,
TII->get(LoadStoreOpcode));
if (IsLoad) {
MIB.addReg(Regs[0].first, RegState::Define)
.addReg(Regs[1].first, RegState::Define);
} else {
MIB.addReg(Regs[0].first, getKillRegState(Regs[0].second))
.addReg(Regs[1].first, getKillRegState(Regs[1].second));
}
MIB.addReg(Base).addImm(Offset).addImm(Pred).addReg(PredReg);
return MIB.getInstr();
}
/// Call MergeOps and update MemOps and merges accordingly on success. /// Call MergeOps and update MemOps and merges accordingly on success.
MachineInstr *ARMLoadStoreOpt::MergeOpsUpdate(const MergeCandidate &Cand) { MachineInstr *ARMLoadStoreOpt::MergeOpsUpdate(const MergeCandidate &Cand) {
const MachineInstr *First = Cand.Instrs.front(); const MachineInstr *First = Cand.Instrs.front();
@ -826,12 +799,7 @@ MachineInstr *ARMLoadStoreOpt::MergeOpsUpdate(const MergeCandidate &Cand) {
unsigned PredReg = 0; unsigned PredReg = 0;
ARMCC::CondCodes Pred = getInstrPredicate(First, PredReg); ARMCC::CondCodes Pred = getInstrPredicate(First, PredReg);
DebugLoc DL = First->getDebugLoc(); DebugLoc DL = First->getDebugLoc();
MachineInstr *Merged = nullptr; MachineInstr *Merged = MergeOps(MBB, InsertBefore, Offset, Base, BaseKill,
if (Cand.CanMergeToLSDouble)
Merged = CreateLoadStoreDouble(MBB, InsertBefore, Offset, Base, BaseKill,
Opcode, Pred, PredReg, DL, Regs);
if (!Merged && Cand.CanMergeToLSMulti)
Merged = CreateLoadStoreMulti(MBB, InsertBefore, Offset, Base, BaseKill,
Opcode, Pred, PredReg, DL, Regs); Opcode, Pred, PredReg, DL, Regs);
if (!Merged) if (!Merged)
return nullptr; return nullptr;
@ -893,13 +861,6 @@ MachineInstr *ARMLoadStoreOpt::MergeOpsUpdate(const MergeCandidate &Cand) {
return Merged; return Merged;
} }
static bool isValidLSDoubleOffset(int Offset) {
unsigned Value = abs(Offset);
// t2LDRDi8/t2STRDi8 supports an 8 bit immediate which is internally
// multiplied by 4.
return (Value % 4) == 0 && Value < 1024;
}
/// Find candidates for load/store multiple merge in list of MemOpQueueEntries. /// Find candidates for load/store multiple merge in list of MemOpQueueEntries.
void ARMLoadStoreOpt::FormCandidates(const MemOpQueue &MemOps) { void ARMLoadStoreOpt::FormCandidates(const MemOpQueue &MemOps) {
const MachineInstr *FirstMI = MemOps[0].MI; const MachineInstr *FirstMI = MemOps[0].MI;
@ -919,51 +880,29 @@ void ARMLoadStoreOpt::FormCandidates(const MemOpQueue &MemOps) {
unsigned Latest = SIndex; unsigned Latest = SIndex;
unsigned Earliest = SIndex; unsigned Earliest = SIndex;
unsigned Count = 1; unsigned Count = 1;
bool CanMergeToLSDouble =
STI->isThumb2() && isNotVFP && isValidLSDoubleOffset(Offset);
// ARM errata 602117: LDRD with base in list may result in incorrect base
// register when interrupted or faulted.
if (STI->isCortexM3() && isi32Load(Opcode) &&
PReg == getLoadStoreBaseOp(*MI).getReg())
CanMergeToLSDouble = false;
bool CanMergeToLSMulti = true; // Merge additional instructions fulfilling LDM/STM constraints.
// On swift vldm/vstm starting with an odd register number as that needs
// more uops than single vldrs.
if (STI->isSwift() && !isNotVFP && (PRegNum % 2) == 1)
CanMergeToLSMulti = false;
// Merge following instructions where possible.
for (unsigned I = SIndex+1; I < EIndex; ++I, ++Count) { for (unsigned I = SIndex+1; I < EIndex; ++I, ++Count) {
int NewOffset = MemOps[I].Offset; int NewOffset = MemOps[I].Offset;
if (NewOffset != Offset + (int)Size) if (NewOffset != Offset + (int)Size)
break; break;
const MachineOperand &MO = getLoadStoreRegOp(*MemOps[I].MI); const MachineOperand &MO = getLoadStoreRegOp(*MemOps[I].MI);
unsigned Reg = MO.getReg(); unsigned Reg = MO.getReg();
unsigned RegNum = MO.isUndef() ? UINT_MAX : TRI->getEncodingValue(Reg);
// See if the current load/store may be part of a multi load/store.
bool PartOfLSMulti = CanMergeToLSMulti;
if (PartOfLSMulti) {
// Cannot load from SP
if (Reg == ARM::SP) if (Reg == ARM::SP)
PartOfLSMulti = false;
// Register numbers must be in ascending order.
else if (RegNum <= PRegNum)
PartOfLSMulti = false;
// For VFP / NEON load/store multiples, the registers must be
// consecutive and within the limit on the number of registers per
// instruction.
else if (!isNotVFP && RegNum != PRegNum+1)
PartOfLSMulti = false;
}
// See if the current load/store may be part of a double load/store.
bool PartOfLSDouble = CanMergeToLSDouble && Count <= 1;
if (!PartOfLSMulti && !PartOfLSDouble)
break; break;
CanMergeToLSMulti &= PartOfLSMulti; unsigned RegNum = MO.isUndef() ? UINT_MAX : TRI->getEncodingValue(Reg);
CanMergeToLSDouble &= PartOfLSDouble; // Register numbers must be in ascending order.
if (RegNum <= PRegNum)
break;
// For VFP / NEON load/store multiples, the registers must be consecutive
// and within the limit on the number of registers per instruction.
if (!isNotVFP && RegNum != PRegNum+1)
break;
// On Swift we don't want vldm/vstm to start with a odd register num
// because Q register unaligned vldm/vstm need more uops.
if (!isNotVFP && STI->isSwift() && Count == 1 && (PRegNum % 2) == 1)
break;
// Track MemOp with latest and earliest position (Positions are // Track MemOp with latest and earliest position (Positions are
// counted in reverse). // counted in reverse).
unsigned Position = MemOps[I].Position; unsigned Position = MemOps[I].Position;
@ -983,10 +922,6 @@ void ARMLoadStoreOpt::FormCandidates(const MemOpQueue &MemOps) {
Candidate->LatestMIIdx = Latest - SIndex; Candidate->LatestMIIdx = Latest - SIndex;
Candidate->EarliestMIIdx = Earliest - SIndex; Candidate->EarliestMIIdx = Earliest - SIndex;
Candidate->InsertPos = MemOps[Latest].Position; Candidate->InsertPos = MemOps[Latest].Position;
if (Count == 1)
CanMergeToLSMulti = CanMergeToLSDouble = false;
Candidate->CanMergeToLSMulti = CanMergeToLSMulti;
Candidate->CanMergeToLSDouble = CanMergeToLSDouble;
Candidates.push_back(Candidate); Candidates.push_back(Candidate);
// Continue after the chain. // Continue after the chain.
SIndex += Count; SIndex += Count;
@ -1718,14 +1653,12 @@ bool ARMLoadStoreOpt::LoadStoreMultipleOpti(MachineBasicBlock &MBB) {
// Go through list of candidates and merge. // Go through list of candidates and merge.
bool Changed = false; bool Changed = false;
for (const MergeCandidate *Candidate : Candidates) { for (const MergeCandidate *Candidate : Candidates) {
if (Candidate->CanMergeToLSMulti || Candidate->CanMergeToLSDouble) { if (Candidate->Instrs.size() > 1) {
MachineInstr *Merged = MergeOpsUpdate(*Candidate); MachineInstr *Merged = MergeOpsUpdate(*Candidate);
// Merge preceding/trailing base inc/dec into the merged op. // Merge preceding/trailing base inc/dec into the merged op.
if (Merged) { if (Merged) {
Changed = true;
unsigned Opcode = Merged->getOpcode();
if (Opcode != ARM::t2STRDi8 && Opcode != ARM::t2LDRDi8)
MergeBaseUpdateLSMultiple(Merged); MergeBaseUpdateLSMultiple(Merged);
Changed = true;
} else { } else {
for (MachineInstr *MI : Candidate->Instrs) { for (MachineInstr *MI : Candidate->Instrs) {
if (MergeBaseUpdateLoadStore(MI)) if (MergeBaseUpdateLoadStore(MI))

View File

@ -25,7 +25,8 @@ entry:
;CHECK: push {r7, lr} ;CHECK: push {r7, lr}
;CHECK: sub sp, #4 ;CHECK: sub sp, #4
;CHECK: add r0, sp, #12 ;CHECK: add r0, sp, #12
;CHECK: strd r1, r2, [sp, #12] ;CHECK: str r2, [sp, #16]
;CHECK: str r1, [sp, #12]
;CHECK: bl fooUseStruct ;CHECK: bl fooUseStruct
call void @fooUseStruct(%st_t* %p1) call void @fooUseStruct(%st_t* %p1)
ret void ret void

View File

@ -28,7 +28,8 @@ define i32 @test_align8(i8*, [4 x i32]* byval align 8 %b) {
; CHECK: push {r4, r7, lr} ; CHECK: push {r4, r7, lr}
; CHECK: add r7, sp, #4 ; CHECK: add r7, sp, #4
; CHECK: strd r2, r3, [r7, #8] ; CHECK-DAG: str r2, [r7, #8]
; CHECK-DAG: str r3, [r7, #12]
; CHECK: ldr r0, [r7, #8] ; CHECK: ldr r0, [r7, #8]

View File

@ -3,7 +3,6 @@
; rdar://6949835 ; rdar://6949835
; RUN: llc < %s -mtriple=thumbv7-apple-ios -mcpu=cortex-a8 -regalloc=basic | FileCheck %s -check-prefix=BASIC -check-prefix=CHECK ; RUN: llc < %s -mtriple=thumbv7-apple-ios -mcpu=cortex-a8 -regalloc=basic | FileCheck %s -check-prefix=BASIC -check-prefix=CHECK
; RUN: llc < %s -mtriple=thumbv7-apple-ios -mcpu=cortex-a8 -regalloc=greedy | FileCheck %s -check-prefix=GREEDY -check-prefix=CHECK ; RUN: llc < %s -mtriple=thumbv7-apple-ios -mcpu=cortex-a8 -regalloc=greedy | FileCheck %s -check-prefix=GREEDY -check-prefix=CHECK
; RUN: llc < %s -mtriple=thumbv7-apple-ios -mcpu=swift | FileCheck %s -check-prefix=SWIFT -check-prefix=CHECK
; Magic ARM pair hints works best with linearscan / fast. ; Magic ARM pair hints works best with linearscan / fast.
@ -111,25 +110,5 @@ entry:
ret void ret void
} }
; CHECK-LABEL: strd_spill_ldrd_reload:
; A8: strd r1, r0, [sp]
; M3: strd r1, r0, [sp]
; BASIC: strd r1, r0, [sp]
; GREEDY: strd r0, r1, [sp]
; CHECK: @ InlineAsm Start
; CHECK: @ InlineAsm End
; A8: ldrd r2, r1, [sp]
; M3: ldrd r2, r1, [sp]
; BASIC: ldrd r2, r1, [sp]
; GREEDY: ldrd r1, r2, [sp]
; CHECK: bl{{x?}} _extfunc
define void @strd_spill_ldrd_reload(i32 %v0, i32 %v1) {
; force %v0 and %v1 to be spilled
call void asm sideeffect "", "~{r0},~{r1},~{r2},~{r3},~{r4},~{r5},~{r6},~{r7},~{r8},~{r9},~{r10},~{r11},~{r12},~{lr}"()
; force the reloaded %v0, %v1 into different registers
call void @extfunc(i32 0, i32 %v0, i32 %v1, i32 7)
ret void
}
declare void @llvm.lifetime.start(i64, i8* nocapture) nounwind declare void @llvm.lifetime.start(i64, i8* nocapture) nounwind
declare void @llvm.lifetime.end(i64, i8* nocapture) nounwind declare void @llvm.lifetime.end(i64, i8* nocapture) nounwind

View File

@ -4,7 +4,8 @@ define void @t1(i8* nocapture %c) nounwind optsize {
entry: entry:
; CHECK-LABEL: t1: ; CHECK-LABEL: t1:
; CHECK: movs r1, #0 ; CHECK: movs r1, #0
; CHECK: strd r1, r1, [r0] ; CHECK: str r1, [r0]
; CHECK: str r1, [r0, #4]
; CHECK: str r1, [r0, #8] ; CHECK: str r1, [r0, #8]
call void @llvm.memset.p0i8.i64(i8* %c, i8 0, i64 12, i32 8, i1 false) call void @llvm.memset.p0i8.i64(i8* %c, i8 0, i64 12, i32 8, i1 false)
ret void ret void

View File

@ -5,20 +5,16 @@ target triple = "thumbv7--linux-gnueabi"
declare i8* @llvm.returnaddress(i32) declare i8* @llvm.returnaddress(i32)
define i32* @wrong-t2stmia-size-reduction(i32* %addr, i32 %val0, i32 %val1) minsize { define i32* @wrong-t2stmia-size-reduction(i32* %addr, i32 %val0) minsize {
store i32 %val0, i32* %addr store i32 %val0, i32* %addr
%addr1 = getelementptr i32, i32* %addr, i32 1 %addr1 = getelementptr i32, i32* %addr, i32 1
%addr2 = getelementptr i32, i32* %addr, i32 2
%lr = call i8* @llvm.returnaddress(i32 0) %lr = call i8* @llvm.returnaddress(i32 0)
%lr32 = ptrtoint i8* %lr to i32 %lr32 = ptrtoint i8* %lr to i32
store i32 %val1, i32* %addr1 store i32 %lr32, i32* %addr1
store i32 %lr32, i32* %addr2 %addr2 = getelementptr i32, i32* %addr1, i32 1
ret i32* %addr2
%addr3 = getelementptr i32, i32* %addr, i32 3
ret i32* %addr3
} }
; Check that stm writes three registers. The bug caused one of registers (LR, ; Check that stm writes two registers. The bug caused one of registers (LR,
; which invalid for Thumb1 form of STMIA instruction) to be dropped. ; which invalid for Thumb1 form of STMIA instruction) to be dropped.
; CHECK-LABEL: wrong-t2stmia-size-reduction: ; CHECK: stm{{[^,]*}}, {{{.*,.*}}}
; CHECK: stm{{[^,]*}}, {{{.*,.*,.*}}}

View File

@ -33,7 +33,8 @@ define float @float_on_stack(double %a, double %b, double %c, double %d, double
define double @double_on_stack(double %a, double %b, double %c, double %d, double %e, double %f, double %g, double %h, double %i) { define double @double_on_stack(double %a, double %b, double %c, double %d, double %e, double %f, double %g, double %h, double %i) {
; CHECK-LABEL: double_on_stack: ; CHECK-LABEL: double_on_stack:
; SOFT: ldrd r0, r1, [sp, #48] ; SOFT: ldr r0, [sp, #48]
; SOFT: ldr r1, [sp, #52]
; HARD: vldr d0, [sp] ; HARD: vldr d0, [sp]
; CHECK-NEXT: bx lr ; CHECK-NEXT: bx lr
ret double %i ret double %i
@ -41,7 +42,8 @@ define double @double_on_stack(double %a, double %b, double %c, double %d, doubl
define double @double_not_split(double %a, double %b, double %c, double %d, double %e, double %f, double %g, float %h, double %i) { define double @double_not_split(double %a, double %b, double %c, double %d, double %e, double %f, double %g, float %h, double %i) {
; CHECK-LABEL: double_not_split: ; CHECK-LABEL: double_not_split:
; SOFT: ldrd r0, r1, [sp, #48] ; SOFT: ldr r0, [sp, #48]
; SOFT: ldr r1, [sp, #52]
; HARD: vldr d0, [sp] ; HARD: vldr d0, [sp]
; CHECK-NEXT: bx lr ; CHECK-NEXT: bx lr
ret double %i ret double %i