mirror of
https://github.com/RPCS3/llvm-mirror.git
synced 2024-11-23 19:23:23 +01:00
Do not fold reload into an instruction with multiple uses. It issues one extra load.
llvm-svn: 44467
This commit is contained in:
parent
c75545d4a8
commit
90c548af8e
@ -274,8 +274,9 @@ namespace llvm {
|
||||
/// MI. If it is successul, MI is updated with the newly created MI and
|
||||
/// returns true.
|
||||
bool tryFoldMemoryOperand(MachineInstr* &MI, VirtRegMap &vrm,
|
||||
MachineInstr *DefMI, unsigned index, unsigned i,
|
||||
bool isSS, int slot, unsigned reg);
|
||||
MachineInstr *DefMI, unsigned InstrIdx,
|
||||
unsigned OpIdx, unsigned NumUses,
|
||||
bool isSS, int Slot, unsigned Reg);
|
||||
|
||||
/// anyKillInMBBAfterIdx - Returns true if there is a kill of the specified
|
||||
/// VNInfo that's after the specified index but is within the basic block.
|
||||
|
@ -642,13 +642,22 @@ bool LiveIntervals::isReMaterializable(const LiveInterval &li,
|
||||
/// MI. If it is successul, MI is updated with the newly created MI and
|
||||
/// returns true.
|
||||
bool LiveIntervals::tryFoldMemoryOperand(MachineInstr* &MI,
|
||||
VirtRegMap &vrm,
|
||||
MachineInstr *DefMI,
|
||||
unsigned index, unsigned i,
|
||||
bool isSS, int slot, unsigned reg) {
|
||||
VirtRegMap &vrm, MachineInstr *DefMI,
|
||||
unsigned InstrIdx, unsigned OpIdx,
|
||||
unsigned NumUses,
|
||||
bool isSS, int Slot, unsigned Reg) {
|
||||
// FIXME: fold subreg use
|
||||
if (MI->getOperand(OpIdx).getSubReg())
|
||||
return false;
|
||||
|
||||
// FIXME: It may be possible to fold load when there are multiple uses.
|
||||
// e.g. On x86, TEST32rr r, r -> CMP32rm [mem], 0
|
||||
if (NumUses > 1)
|
||||
return false;
|
||||
|
||||
MachineInstr *fmi = isSS
|
||||
? mri_->foldMemoryOperand(MI, i, slot)
|
||||
: mri_->foldMemoryOperand(MI, i, DefMI);
|
||||
? mri_->foldMemoryOperand(MI, OpIdx, Slot)
|
||||
: mri_->foldMemoryOperand(MI, OpIdx, DefMI);
|
||||
if (fmi) {
|
||||
// Attempt to fold the memory reference into the instruction. If
|
||||
// we can do this, we don't need to insert spill code.
|
||||
@ -657,15 +666,13 @@ bool LiveIntervals::tryFoldMemoryOperand(MachineInstr* &MI,
|
||||
else
|
||||
LiveVariables::transferKillDeadInfo(MI, fmi, mri_);
|
||||
MachineBasicBlock &MBB = *MI->getParent();
|
||||
if (isSS) {
|
||||
if (!mf_->getFrameInfo()->isFixedObjectIndex(slot))
|
||||
vrm.virtFolded(reg, MI, i, fmi);
|
||||
}
|
||||
if (isSS && !mf_->getFrameInfo()->isFixedObjectIndex(Slot))
|
||||
vrm.virtFolded(Reg, MI, OpIdx, fmi);
|
||||
vrm.transferSpillPts(MI, fmi);
|
||||
vrm.transferRestorePts(MI, fmi);
|
||||
mi2iMap_.erase(MI);
|
||||
i2miMap_[index/InstrSlots::NUM] = fmi;
|
||||
mi2iMap_[fmi] = index;
|
||||
i2miMap_[InstrIdx /InstrSlots::NUM] = fmi;
|
||||
mi2iMap_[fmi] = InstrIdx;
|
||||
MI = MBB.insert(MBB.erase(MI), fmi);
|
||||
++numFolds;
|
||||
return true;
|
||||
@ -714,8 +721,6 @@ rewriteInstructionForSpills(const LiveInterval &li, bool TrySplit,
|
||||
unsigned RegI = Reg;
|
||||
if (Reg == 0 || MRegisterInfo::isPhysicalRegister(Reg))
|
||||
continue;
|
||||
unsigned SubIdx = mop.getSubReg();
|
||||
bool isSubReg = SubIdx != 0;
|
||||
if (Reg != li.reg)
|
||||
continue;
|
||||
|
||||
@ -726,6 +731,8 @@ rewriteInstructionForSpills(const LiveInterval &li, bool TrySplit,
|
||||
// If this is the rematerializable definition MI itself and
|
||||
// all of its uses are rematerialized, simply delete it.
|
||||
if (MI == ReMatOrigDefMI && CanDelete) {
|
||||
DOUT << "\t\t\t\tErasing re-materlizable def: ";
|
||||
DOUT << MI << '\n';
|
||||
RemoveMachineInstrFromMaps(MI);
|
||||
vrm.RemoveMachineInstrFromMaps(MI);
|
||||
MI->eraseFromParent();
|
||||
@ -747,23 +754,6 @@ rewriteInstructionForSpills(const LiveInterval &li, bool TrySplit,
|
||||
if (TryFold)
|
||||
TryFold = !TrySplit && NewVReg == 0;
|
||||
|
||||
// FIXME: fold subreg use
|
||||
if (!isSubReg && TryFold &&
|
||||
tryFoldMemoryOperand(MI, vrm, ReMatDefMI, index, i, FoldSS, FoldSlot,
|
||||
Reg))
|
||||
// Folding the load/store can completely change the instruction in
|
||||
// unpredictable ways, rescan it from the beginning.
|
||||
goto RestartInstruction;
|
||||
|
||||
// Create a new virtual register for the spill interval.
|
||||
bool CreatedNewVReg = false;
|
||||
if (NewVReg == 0) {
|
||||
NewVReg = RegMap->createVirtualRegister(rc);
|
||||
vrm.grow();
|
||||
CreatedNewVReg = true;
|
||||
}
|
||||
mop.setReg(NewVReg);
|
||||
|
||||
// Scan all of the operands of this instruction rewriting operands
|
||||
// to use NewVReg instead of li.reg as appropriate. We do this for
|
||||
// two reasons:
|
||||
@ -775,9 +765,11 @@ rewriteInstructionForSpills(const LiveInterval &li, bool TrySplit,
|
||||
//
|
||||
// Keep track of whether we replace a use and/or def so that we can
|
||||
// create the spill interval with the appropriate range.
|
||||
|
||||
|
||||
HasUse = mop.isUse();
|
||||
HasDef = mop.isDef();
|
||||
unsigned NumUses = HasUse;
|
||||
std::vector<unsigned> UpdateOps;
|
||||
for (unsigned j = i+1, e = MI->getNumOperands(); j != e; ++j) {
|
||||
if (!MI->getOperand(j).isRegister())
|
||||
continue;
|
||||
@ -785,12 +777,37 @@ rewriteInstructionForSpills(const LiveInterval &li, bool TrySplit,
|
||||
if (RegJ == 0 || MRegisterInfo::isPhysicalRegister(RegJ))
|
||||
continue;
|
||||
if (RegJ == RegI) {
|
||||
MI->getOperand(j).setReg(NewVReg);
|
||||
UpdateOps.push_back(j);
|
||||
if (MI->getOperand(j).isUse())
|
||||
++NumUses;
|
||||
HasUse |= MI->getOperand(j).isUse();
|
||||
HasDef |= MI->getOperand(j).isDef();
|
||||
}
|
||||
}
|
||||
|
||||
if (TryFold &&
|
||||
tryFoldMemoryOperand(MI, vrm, ReMatDefMI, index, i,
|
||||
NumUses, FoldSS, FoldSlot, Reg)) {
|
||||
// Folding the load/store can completely change the instruction in
|
||||
// unpredictable ways, rescan it from the beginning.
|
||||
HasUse = false;
|
||||
HasDef = false;
|
||||
goto RestartInstruction;
|
||||
}
|
||||
|
||||
// Create a new virtual register for the spill interval.
|
||||
bool CreatedNewVReg = false;
|
||||
if (NewVReg == 0) {
|
||||
NewVReg = RegMap->createVirtualRegister(rc);
|
||||
vrm.grow();
|
||||
CreatedNewVReg = true;
|
||||
}
|
||||
mop.setReg(NewVReg);
|
||||
|
||||
// Reuse NewVReg for other reads.
|
||||
for (unsigned j = 0, e = UpdateOps.size(); j != e; ++j)
|
||||
MI->getOperand(UpdateOps[j]).setReg(NewVReg);
|
||||
|
||||
if (CreatedNewVReg) {
|
||||
if (DefIsReMat) {
|
||||
vrm.setVirtIsReMaterialized(NewVReg, ReMatDefMI/*, CanDelete*/);
|
||||
@ -1197,46 +1214,44 @@ addIntervalsForSpills(const LiveInterval &li,
|
||||
for (unsigned i = 0, e = spills.size(); i != e; ++i) {
|
||||
int index = spills[i].index;
|
||||
unsigned VReg = spills[i].vreg;
|
||||
bool DoFold = spills[i].canFold;
|
||||
bool isReMat = vrm.isReMaterialized(VReg);
|
||||
MachineInstr *MI = getInstructionFromIndex(index);
|
||||
int OpIdx = -1;
|
||||
bool FoldedLoad = false;
|
||||
if (DoFold) {
|
||||
unsigned NumUses = 0;
|
||||
if (spills[i].canFold) {
|
||||
for (unsigned j = 0, ee = MI->getNumOperands(); j != ee; ++j) {
|
||||
MachineOperand &MO = MI->getOperand(j);
|
||||
if (!MO.isRegister() || MO.getReg() != VReg)
|
||||
continue;
|
||||
if (MO.isUse()) {
|
||||
// Can't fold if it's two-address code and the use isn't the
|
||||
// first and only use.
|
||||
// If there are more than one uses, a load is still needed.
|
||||
if (!isReMat && !FoldedLoad &&
|
||||
alsoFoldARestore(Id, index,VReg,RestoreMBBs,RestoreIdxes)) {
|
||||
FoldedLoad = true;
|
||||
continue;
|
||||
} else {
|
||||
OpIdx = -1;
|
||||
break;
|
||||
}
|
||||
if (MO.isDef()) {
|
||||
OpIdx = (int)j;
|
||||
continue;
|
||||
}
|
||||
OpIdx = (int)j;
|
||||
// Can't fold if it's two-address code and the use isn't the
|
||||
// first and only use.
|
||||
if (isReMat ||
|
||||
(NumUses == 0 && !alsoFoldARestore(Id, index, VReg, RestoreMBBs,
|
||||
RestoreIdxes))) {
|
||||
OpIdx = -1;
|
||||
break;
|
||||
}
|
||||
++NumUses;
|
||||
}
|
||||
}
|
||||
// Fold the store into the def if possible.
|
||||
if (OpIdx == -1)
|
||||
DoFold = false;
|
||||
if (DoFold) {
|
||||
if (tryFoldMemoryOperand(MI, vrm, NULL, index,OpIdx,true,Slot,VReg)) {
|
||||
if (FoldedLoad)
|
||||
bool Folded = false;
|
||||
if (OpIdx != -1) {
|
||||
if (tryFoldMemoryOperand(MI, vrm, NULL, index, OpIdx, NumUses,
|
||||
true, Slot, VReg)) {
|
||||
if (NumUses)
|
||||
// Folded a two-address instruction, do not issue a load.
|
||||
eraseRestoreInfo(Id, index, VReg, RestoreMBBs, RestoreIdxes);
|
||||
} else
|
||||
DoFold = false;
|
||||
Folded = true;
|
||||
}
|
||||
}
|
||||
|
||||
// Else tell the spiller to issue a store for us.
|
||||
if (!DoFold)
|
||||
if (!Folded)
|
||||
vrm.addSpillPoint(VReg, MI);
|
||||
}
|
||||
Id = SpillMBBs.find_next(Id);
|
||||
@ -1251,32 +1266,30 @@ addIntervalsForSpills(const LiveInterval &li,
|
||||
if (index == -1)
|
||||
continue;
|
||||
unsigned VReg = restores[i].vreg;
|
||||
bool DoFold = restores[i].canFold;
|
||||
MachineInstr *MI = getInstructionFromIndex(index);
|
||||
unsigned NumUses = 0;
|
||||
int OpIdx = -1;
|
||||
if (DoFold) {
|
||||
if (restores[i].canFold) {
|
||||
for (unsigned j = 0, ee = MI->getNumOperands(); j != ee; ++j) {
|
||||
MachineOperand &MO = MI->getOperand(j);
|
||||
if (!MO.isRegister() || MO.getReg() != VReg)
|
||||
continue;
|
||||
if (MO.isDef()) {
|
||||
// Can't fold if it's two-address code.
|
||||
// Can't fold if it's two-address code and it hasn't already
|
||||
// been folded.
|
||||
OpIdx = -1;
|
||||
break;
|
||||
}
|
||||
if (OpIdx != -1) {
|
||||
// Multiple uses, do not fold!
|
||||
OpIdx = -1;
|
||||
break;
|
||||
}
|
||||
OpIdx = (int)j;
|
||||
if (NumUses == 0)
|
||||
// Use the first use index.
|
||||
OpIdx = (int)j;
|
||||
++NumUses;
|
||||
}
|
||||
}
|
||||
|
||||
// Fold the load into the use if possible.
|
||||
if (OpIdx == -1)
|
||||
DoFold = false;
|
||||
if (DoFold) {
|
||||
bool Folded = false;
|
||||
if (OpIdx != -1) {
|
||||
if (vrm.isReMaterialized(VReg)) {
|
||||
MachineInstr *ReMatDefMI = vrm.getReMaterializedMI(VReg);
|
||||
int LdSlot = 0;
|
||||
@ -1284,17 +1297,15 @@ addIntervalsForSpills(const LiveInterval &li,
|
||||
// If the rematerializable def is a load, also try to fold it.
|
||||
if (isLoadSS ||
|
||||
(ReMatDefMI->getInstrDescriptor()->Flags & M_LOAD_FLAG))
|
||||
DoFold = tryFoldMemoryOperand(MI, vrm, ReMatDefMI, index, OpIdx,
|
||||
isLoadSS, LdSlot, VReg);
|
||||
else
|
||||
DoFold = false;
|
||||
Folded = tryFoldMemoryOperand(MI, vrm, ReMatDefMI, index, OpIdx,
|
||||
NumUses, isLoadSS, LdSlot, VReg);
|
||||
} else
|
||||
DoFold = tryFoldMemoryOperand(MI, vrm, NULL, index, OpIdx,
|
||||
Folded = tryFoldMemoryOperand(MI, vrm, NULL, index, OpIdx, NumUses,
|
||||
true, Slot, VReg);
|
||||
}
|
||||
// If folding is not possible / failed, then tell the spiller to issue a
|
||||
// load / rematerialization for us.
|
||||
if (!DoFold)
|
||||
if (!Folded)
|
||||
vrm.addRestorePoint(VReg, MI);
|
||||
}
|
||||
Id = RestoreMBBs.find_next(Id);
|
||||
|
81
test/CodeGen/X86/2007-11-30-LoadFolding-Bug.ll
Normal file
81
test/CodeGen/X86/2007-11-30-LoadFolding-Bug.ll
Normal file
@ -0,0 +1,81 @@
|
||||
; RUN: llvm-as < %s | llc -march=x86 -mattr=+sse2 -stats |& \
|
||||
; RUN: grep {1 .*folded into instructions}
|
||||
|
||||
declare fastcc void @rdft(i32, i32, double*, i32*, double*)
|
||||
|
||||
define fastcc void @mp_sqrt(i32 %n, i32 %radix, i32* %in, i32* %out, i32* %tmp1, i32* %tmp2, i32 %nfft, double* %tmp1fft, double* %tmp2fft, i32* %ip, double* %w) {
|
||||
entry:
|
||||
br label %bb.i5
|
||||
|
||||
bb.i5: ; preds = %bb.i5, %entry
|
||||
%nfft_init.0.i = phi i32 [ 1, %entry ], [ %tmp7.i3, %bb.i5 ] ; <i32> [#uses=1]
|
||||
%tmp7.i3 = shl i32 %nfft_init.0.i, 1 ; <i32> [#uses=2]
|
||||
br i1 false, label %bb.i5, label %mp_unexp_mp2d.exit.i
|
||||
|
||||
mp_unexp_mp2d.exit.i: ; preds = %bb.i5
|
||||
br i1 false, label %cond_next.i, label %cond_true.i
|
||||
|
||||
cond_true.i: ; preds = %mp_unexp_mp2d.exit.i
|
||||
ret void
|
||||
|
||||
cond_next.i: ; preds = %mp_unexp_mp2d.exit.i
|
||||
%tmp22.i = sdiv i32 0, 2 ; <i32> [#uses=2]
|
||||
br i1 false, label %cond_true29.i, label %cond_next36.i
|
||||
|
||||
cond_true29.i: ; preds = %cond_next.i
|
||||
ret void
|
||||
|
||||
cond_next36.i: ; preds = %cond_next.i
|
||||
store i32 %tmp22.i, i32* null, align 4
|
||||
%tmp8.i14.i = select i1 false, i32 1, i32 0 ; <i32> [#uses=1]
|
||||
br label %bb.i28.i
|
||||
|
||||
bb.i28.i: ; preds = %bb.i28.i, %cond_next36.i
|
||||
%j.0.reg2mem.0.i16.i = phi i32 [ 0, %cond_next36.i ], [ %indvar.next39.i, %bb.i28.i ] ; <i32> [#uses=2]
|
||||
%din_addr.1.reg2mem.0.i17.i = phi double [ 0.000000e+00, %cond_next36.i ], [ %tmp16.i25.i, %bb.i28.i ] ; <double> [#uses=1]
|
||||
%tmp1.i18.i = fptosi double %din_addr.1.reg2mem.0.i17.i to i32 ; <i32> [#uses=2]
|
||||
%tmp4.i19.i = icmp slt i32 %tmp1.i18.i, %radix ; <i1> [#uses=1]
|
||||
%x.0.i21.i = select i1 %tmp4.i19.i, i32 %tmp1.i18.i, i32 0 ; <i32> [#uses=1]
|
||||
%tmp41.sum.i = add i32 %j.0.reg2mem.0.i16.i, 2 ; <i32> [#uses=0]
|
||||
%tmp1213.i23.i = sitofp i32 %x.0.i21.i to double ; <double> [#uses=1]
|
||||
%tmp15.i24.i = sub double 0.000000e+00, %tmp1213.i23.i ; <double> [#uses=1]
|
||||
%tmp16.i25.i = mul double 0.000000e+00, %tmp15.i24.i ; <double> [#uses=1]
|
||||
%indvar.next39.i = add i32 %j.0.reg2mem.0.i16.i, 1 ; <i32> [#uses=2]
|
||||
%exitcond40.i = icmp eq i32 %indvar.next39.i, %tmp8.i14.i ; <i1> [#uses=1]
|
||||
br i1 %exitcond40.i, label %mp_unexp_d2mp.exit29.i, label %bb.i28.i
|
||||
|
||||
mp_unexp_d2mp.exit29.i: ; preds = %bb.i28.i
|
||||
%tmp46.i = sub i32 0, %tmp22.i ; <i32> [#uses=1]
|
||||
store i32 %tmp46.i, i32* null, align 4
|
||||
br i1 false, label %bb.i.i, label %mp_sqrt_init.exit
|
||||
|
||||
bb.i.i: ; preds = %bb.i.i, %mp_unexp_d2mp.exit29.i
|
||||
br label %bb.i.i
|
||||
|
||||
mp_sqrt_init.exit: ; preds = %mp_unexp_d2mp.exit29.i
|
||||
tail call fastcc void @mp_mul_csqu( i32 0, double* %tmp1fft )
|
||||
tail call fastcc void @rdft( i32 0, i32 -1, double* null, i32* %ip, double* %w )
|
||||
tail call fastcc void @mp_mul_d2i( i32 0, i32 %radix, i32 0, double* %tmp1fft, i32* %tmp2 )
|
||||
br i1 false, label %cond_false.i, label %cond_true36.i
|
||||
|
||||
cond_true36.i: ; preds = %mp_sqrt_init.exit
|
||||
ret void
|
||||
|
||||
cond_false.i: ; preds = %mp_sqrt_init.exit
|
||||
tail call fastcc void @mp_round( i32 0, i32 %radix, i32 0, i32* %out )
|
||||
tail call fastcc void @mp_add( i32 0, i32 %radix, i32* %tmp1, i32* %tmp2, i32* %tmp1 )
|
||||
tail call fastcc void @mp_sub( i32 0, i32 %radix, i32* %in, i32* %tmp2, i32* %tmp2 )
|
||||
tail call fastcc void @mp_round( i32 0, i32 %radix, i32 0, i32* %tmp1 )
|
||||
tail call fastcc void @mp_mul_d2i( i32 0, i32 %radix, i32 %tmp7.i3, double* %tmp2fft, i32* %tmp2 )
|
||||
ret void
|
||||
}
|
||||
|
||||
declare fastcc void @mp_add(i32, i32, i32*, i32*, i32*)
|
||||
|
||||
declare fastcc void @mp_sub(i32, i32, i32*, i32*, i32*)
|
||||
|
||||
declare fastcc void @mp_round(i32, i32, i32, i32*)
|
||||
|
||||
declare fastcc void @mp_mul_csqu(i32, double*)
|
||||
|
||||
declare fastcc void @mp_mul_d2i(i32, i32, i32, double*, i32*)
|
Loading…
Reference in New Issue
Block a user