mirror of
https://github.com/RPCS3/llvm-mirror.git
synced 2024-11-23 11:13:28 +01:00
[X86] Account for partial stack slot spills (PR30821)
Previously, _any_ store or load instruction was considered to be operating on a spill if it had a frameindex as an operand, and thus was fair game for optimisations such as "StackSlotColoring". This usually works, except on architectures where spills can be partially restored, for example on X86 where a spilt vector can have a single component loaded (zeroing the rest of the target register). This can be mis-interpreted and the zero extension unsoundly eliminated, see pr30821. To avoid this, this commit optionally provides the caller to isLoadFromStackSlot and isStoreToStackSlot with the number of bytes spilt/loaded by the given instruction. Optimisations can then determine that a full spill followed by a partial load (or vice versa), for example, cannot necessarily be commuted. Patch by Jeremy Morse! Differential Revision: https://reviews.llvm.org/D44782 llvm-svn: 330778
This commit is contained in:
parent
8800ca515f
commit
d952afa83a
@ -225,6 +225,17 @@ public:
|
||||
return 0;
|
||||
}
|
||||
|
||||
/// Optional extension of isLoadFromStackSlot that returns the number of
|
||||
/// bytes loaded from the stack. This must be implemented if a backend
|
||||
/// supports partial stack slot spills/loads to further disambiguate
|
||||
/// what the load does.
|
||||
virtual unsigned isLoadFromStackSlot(const MachineInstr &MI,
|
||||
int &FrameIndex,
|
||||
unsigned &MemBytes) const {
|
||||
MemBytes = 0;
|
||||
return isLoadFromStackSlot(MI, FrameIndex);
|
||||
}
|
||||
|
||||
/// Check for post-frame ptr elimination stack locations as well.
|
||||
/// This uses a heuristic so it isn't reliable for correctness.
|
||||
virtual unsigned isLoadFromStackSlotPostFE(const MachineInstr &MI,
|
||||
@ -252,6 +263,17 @@ public:
|
||||
return 0;
|
||||
}
|
||||
|
||||
/// Optional extension of isStoreToStackSlot that returns the number of
|
||||
/// bytes stored to the stack. This must be implemented if a backend
|
||||
/// supports partial stack slot spills/loads to further disambiguate
|
||||
/// what the store does.
|
||||
virtual unsigned isStoreToStackSlot(const MachineInstr &MI,
|
||||
int &FrameIndex,
|
||||
unsigned &MemBytes) const {
|
||||
MemBytes = 0;
|
||||
return isStoreToStackSlot(MI, FrameIndex);
|
||||
}
|
||||
|
||||
/// Check for post-frame ptr elimination stack locations as well.
|
||||
/// This uses a heuristic, so it isn't reliable for correctness.
|
||||
virtual unsigned isStoreToStackSlotPostFE(const MachineInstr &MI,
|
||||
|
@ -418,7 +418,9 @@ bool StackSlotColoring::RemoveDeadStores(MachineBasicBlock* MBB) {
|
||||
|
||||
unsigned LoadReg = 0;
|
||||
unsigned StoreReg = 0;
|
||||
if (!(LoadReg = TII->isLoadFromStackSlot(*I, FirstSS)))
|
||||
unsigned LoadSize = 0;
|
||||
unsigned StoreSize = 0;
|
||||
if (!(LoadReg = TII->isLoadFromStackSlot(*I, FirstSS, LoadSize)))
|
||||
continue;
|
||||
// Skip the ...pseudo debugging... instructions between a load and store.
|
||||
while ((NextMI != E) && NextMI->isDebugValue()) {
|
||||
@ -426,9 +428,11 @@ bool StackSlotColoring::RemoveDeadStores(MachineBasicBlock* MBB) {
|
||||
++I;
|
||||
}
|
||||
if (NextMI == E) continue;
|
||||
if (!(StoreReg = TII->isStoreToStackSlot(*NextMI, SecondSS)))
|
||||
if (!(StoreReg = TII->isStoreToStackSlot(*NextMI, SecondSS, StoreSize)))
|
||||
continue;
|
||||
if (FirstSS != SecondSS || LoadReg != StoreReg || FirstSS == -1 ||
|
||||
LoadSize != StoreSize)
|
||||
continue;
|
||||
if (FirstSS != SecondSS || LoadReg != StoreReg || FirstSS == -1) continue;
|
||||
|
||||
++NumDead;
|
||||
changed = true;
|
||||
|
@ -3939,24 +3939,40 @@ bool X86InstrInfo::isFrameOperand(const MachineInstr &MI, unsigned int Op,
|
||||
return false;
|
||||
}
|
||||
|
||||
static bool isFrameLoadOpcode(int Opcode) {
|
||||
static bool isFrameLoadOpcode(int Opcode, unsigned &MemBytes) {
|
||||
switch (Opcode) {
|
||||
default:
|
||||
return false;
|
||||
case X86::MOV8rm:
|
||||
case X86::KMOVBkm:
|
||||
MemBytes = 1;
|
||||
return true;
|
||||
case X86::MOV16rm:
|
||||
case X86::KMOVWkm:
|
||||
MemBytes = 2;
|
||||
return true;
|
||||
case X86::MOV32rm:
|
||||
case X86::MOVSSrm:
|
||||
case X86::VMOVSSZrm:
|
||||
case X86::KMOVDkm:
|
||||
MemBytes = 4;
|
||||
return true;
|
||||
case X86::MOV64rm:
|
||||
case X86::LD_Fp64m:
|
||||
case X86::MOVSSrm:
|
||||
case X86::MOVSDrm:
|
||||
case X86::VMOVSSrm:
|
||||
case X86::VMOVSDZrm:
|
||||
case X86::MMX_MOVD64rm:
|
||||
case X86::MMX_MOVQ64rm:
|
||||
case X86::KMOVQkm:
|
||||
MemBytes = 8;
|
||||
return true;
|
||||
case X86::MOVAPSrm:
|
||||
case X86::MOVUPSrm:
|
||||
case X86::MOVAPDrm:
|
||||
case X86::MOVUPDrm:
|
||||
case X86::MOVDQArm:
|
||||
case X86::MOVDQUrm:
|
||||
case X86::VMOVSSrm:
|
||||
case X86::VMOVSDrm:
|
||||
case X86::VMOVAPSrm:
|
||||
case X86::VMOVUPSrm:
|
||||
@ -3964,131 +3980,142 @@ static bool isFrameLoadOpcode(int Opcode) {
|
||||
case X86::VMOVUPDrm:
|
||||
case X86::VMOVDQArm:
|
||||
case X86::VMOVDQUrm:
|
||||
case X86::VMOVUPSYrm:
|
||||
case X86::VMOVAPSYrm:
|
||||
case X86::VMOVUPDYrm:
|
||||
case X86::VMOVAPDYrm:
|
||||
case X86::VMOVDQUYrm:
|
||||
case X86::VMOVDQAYrm:
|
||||
case X86::MMX_MOVD64rm:
|
||||
case X86::MMX_MOVQ64rm:
|
||||
case X86::VMOVSSZrm:
|
||||
case X86::VMOVSDZrm:
|
||||
case X86::VMOVAPSZrm:
|
||||
case X86::VMOVAPSZ128rm:
|
||||
case X86::VMOVAPSZ256rm:
|
||||
case X86::VMOVAPSZ128rm_NOVLX:
|
||||
case X86::VMOVAPSZ256rm_NOVLX:
|
||||
case X86::VMOVUPSZrm:
|
||||
case X86::VMOVUPSZ128rm:
|
||||
case X86::VMOVUPSZ256rm:
|
||||
case X86::VMOVAPSZ128rm_NOVLX:
|
||||
case X86::VMOVUPSZ128rm_NOVLX:
|
||||
case X86::VMOVUPSZ256rm_NOVLX:
|
||||
case X86::VMOVAPDZrm:
|
||||
case X86::VMOVAPDZ128rm:
|
||||
case X86::VMOVAPDZ256rm:
|
||||
case X86::VMOVUPDZrm:
|
||||
case X86::VMOVUPDZ128rm:
|
||||
case X86::VMOVUPDZ256rm:
|
||||
case X86::VMOVDQA32Zrm:
|
||||
case X86::VMOVDQA32Z128rm:
|
||||
case X86::VMOVDQA32Z256rm:
|
||||
case X86::VMOVDQU32Zrm:
|
||||
case X86::VMOVDQU32Z128rm:
|
||||
case X86::VMOVDQU32Z256rm:
|
||||
case X86::VMOVDQA64Zrm:
|
||||
case X86::VMOVDQA64Z128rm:
|
||||
case X86::VMOVDQA64Z256rm:
|
||||
case X86::VMOVDQU64Zrm:
|
||||
case X86::VMOVDQU64Z128rm:
|
||||
case X86::VMOVDQU64Z256rm:
|
||||
case X86::VMOVDQU8Zrm:
|
||||
case X86::VMOVDQU8Z128rm:
|
||||
case X86::VMOVDQU8Z256rm:
|
||||
case X86::VMOVDQU16Zrm:
|
||||
case X86::VMOVDQU16Z128rm:
|
||||
case X86::VMOVDQA32Z128rm:
|
||||
case X86::VMOVDQU32Z128rm:
|
||||
case X86::VMOVDQA64Z128rm:
|
||||
case X86::VMOVDQU64Z128rm:
|
||||
MemBytes = 16;
|
||||
return true;
|
||||
case X86::VMOVAPSYrm:
|
||||
case X86::VMOVUPSYrm:
|
||||
case X86::VMOVAPDYrm:
|
||||
case X86::VMOVUPDYrm:
|
||||
case X86::VMOVDQAYrm:
|
||||
case X86::VMOVDQUYrm:
|
||||
case X86::VMOVAPSZ256rm:
|
||||
case X86::VMOVUPSZ256rm:
|
||||
case X86::VMOVAPSZ256rm_NOVLX:
|
||||
case X86::VMOVUPSZ256rm_NOVLX:
|
||||
case X86::VMOVAPDZ256rm:
|
||||
case X86::VMOVUPDZ256rm:
|
||||
case X86::VMOVDQU8Z256rm:
|
||||
case X86::VMOVDQU16Z256rm:
|
||||
case X86::KMOVBkm:
|
||||
case X86::KMOVWkm:
|
||||
case X86::KMOVDkm:
|
||||
case X86::KMOVQkm:
|
||||
case X86::VMOVDQA32Z256rm:
|
||||
case X86::VMOVDQU32Z256rm:
|
||||
case X86::VMOVDQA64Z256rm:
|
||||
case X86::VMOVDQU64Z256rm:
|
||||
MemBytes = 32;
|
||||
return true;
|
||||
case X86::VMOVAPSZrm:
|
||||
case X86::VMOVUPSZrm:
|
||||
case X86::VMOVAPDZrm:
|
||||
case X86::VMOVUPDZrm:
|
||||
case X86::VMOVDQU8Zrm:
|
||||
case X86::VMOVDQU16Zrm:
|
||||
case X86::VMOVDQA32Zrm:
|
||||
case X86::VMOVDQU32Zrm:
|
||||
case X86::VMOVDQA64Zrm:
|
||||
case X86::VMOVDQU64Zrm:
|
||||
MemBytes = 64;
|
||||
return true;
|
||||
}
|
||||
}
|
||||
|
||||
static bool isFrameStoreOpcode(int Opcode) {
|
||||
static bool isFrameStoreOpcode(int Opcode, unsigned &MemBytes) {
|
||||
switch (Opcode) {
|
||||
default: break;
|
||||
default:
|
||||
return false;
|
||||
case X86::MOV8mr:
|
||||
case X86::KMOVBmk:
|
||||
MemBytes = 1;
|
||||
return true;
|
||||
case X86::MOV16mr:
|
||||
case X86::KMOVWmk:
|
||||
MemBytes = 2;
|
||||
return true;
|
||||
case X86::MOV32mr:
|
||||
case X86::MOVSSmr:
|
||||
case X86::VMOVSSmr:
|
||||
case X86::VMOVSSZmr:
|
||||
case X86::KMOVDmk:
|
||||
MemBytes = 4;
|
||||
return true;
|
||||
case X86::MOV64mr:
|
||||
case X86::ST_FpP64m:
|
||||
case X86::MOVSSmr:
|
||||
case X86::MOVSDmr:
|
||||
case X86::VMOVSDmr:
|
||||
case X86::VMOVSDZmr:
|
||||
case X86::MMX_MOVD64mr:
|
||||
case X86::MMX_MOVQ64mr:
|
||||
case X86::MMX_MOVNTQmr:
|
||||
case X86::KMOVQmk:
|
||||
MemBytes = 8;
|
||||
return true;
|
||||
case X86::MOVAPSmr:
|
||||
case X86::MOVUPSmr:
|
||||
case X86::MOVAPDmr:
|
||||
case X86::MOVUPDmr:
|
||||
case X86::MOVDQAmr:
|
||||
case X86::MOVDQUmr:
|
||||
case X86::VMOVSSmr:
|
||||
case X86::VMOVSDmr:
|
||||
case X86::VMOVAPSmr:
|
||||
case X86::VMOVUPSmr:
|
||||
case X86::VMOVAPDmr:
|
||||
case X86::VMOVUPDmr:
|
||||
case X86::VMOVDQAmr:
|
||||
case X86::VMOVDQUmr:
|
||||
case X86::VMOVUPSZ128mr:
|
||||
case X86::VMOVAPSZ128mr:
|
||||
case X86::VMOVUPSZ128mr_NOVLX:
|
||||
case X86::VMOVAPSZ128mr_NOVLX:
|
||||
case X86::VMOVUPDZ128mr:
|
||||
case X86::VMOVAPDZ128mr:
|
||||
case X86::VMOVDQA32Z128mr:
|
||||
case X86::VMOVDQU32Z128mr:
|
||||
case X86::VMOVDQA64Z128mr:
|
||||
case X86::VMOVDQU64Z128mr:
|
||||
case X86::VMOVDQU8Z128mr:
|
||||
case X86::VMOVDQU16Z128mr:
|
||||
MemBytes = 16;
|
||||
return true;
|
||||
case X86::VMOVUPSYmr:
|
||||
case X86::VMOVAPSYmr:
|
||||
case X86::VMOVUPDYmr:
|
||||
case X86::VMOVAPDYmr:
|
||||
case X86::VMOVDQUYmr:
|
||||
case X86::VMOVDQAYmr:
|
||||
case X86::VMOVSSZmr:
|
||||
case X86::VMOVSDZmr:
|
||||
case X86::VMOVUPSZmr:
|
||||
case X86::VMOVUPSZ128mr:
|
||||
case X86::VMOVUPSZ256mr:
|
||||
case X86::VMOVUPSZ128mr_NOVLX:
|
||||
case X86::VMOVUPSZ256mr_NOVLX:
|
||||
case X86::VMOVAPSZmr:
|
||||
case X86::VMOVAPSZ128mr:
|
||||
case X86::VMOVAPSZ256mr:
|
||||
case X86::VMOVAPSZ128mr_NOVLX:
|
||||
case X86::VMOVUPSZ256mr_NOVLX:
|
||||
case X86::VMOVAPSZ256mr_NOVLX:
|
||||
case X86::VMOVUPDZmr:
|
||||
case X86::VMOVUPDZ128mr:
|
||||
case X86::VMOVUPDZ256mr:
|
||||
case X86::VMOVAPDZmr:
|
||||
case X86::VMOVAPDZ128mr:
|
||||
case X86::VMOVAPDZ256mr:
|
||||
case X86::VMOVDQA32Zmr:
|
||||
case X86::VMOVDQA32Z128mr:
|
||||
case X86::VMOVDQA32Z256mr:
|
||||
case X86::VMOVDQU32Zmr:
|
||||
case X86::VMOVDQU32Z128mr:
|
||||
case X86::VMOVDQU32Z256mr:
|
||||
case X86::VMOVDQA64Zmr:
|
||||
case X86::VMOVDQA64Z128mr:
|
||||
case X86::VMOVDQA64Z256mr:
|
||||
case X86::VMOVDQU64Zmr:
|
||||
case X86::VMOVDQU64Z128mr:
|
||||
case X86::VMOVDQU64Z256mr:
|
||||
case X86::VMOVDQU8Zmr:
|
||||
case X86::VMOVDQU8Z128mr:
|
||||
case X86::VMOVDQU8Z256mr:
|
||||
case X86::VMOVDQU16Zmr:
|
||||
case X86::VMOVDQU16Z128mr:
|
||||
case X86::VMOVDQU16Z256mr:
|
||||
case X86::MMX_MOVD64mr:
|
||||
case X86::MMX_MOVQ64mr:
|
||||
case X86::MMX_MOVNTQmr:
|
||||
case X86::KMOVBmk:
|
||||
case X86::KMOVWmk:
|
||||
case X86::KMOVDmk:
|
||||
case X86::KMOVQmk:
|
||||
case X86::VMOVDQA32Z256mr:
|
||||
case X86::VMOVDQU32Z256mr:
|
||||
case X86::VMOVDQA64Z256mr:
|
||||
case X86::VMOVDQU64Z256mr:
|
||||
MemBytes = 32;
|
||||
return true;
|
||||
case X86::VMOVUPSZmr:
|
||||
case X86::VMOVAPSZmr:
|
||||
case X86::VMOVUPDZmr:
|
||||
case X86::VMOVAPDZmr:
|
||||
case X86::VMOVDQU8Zmr:
|
||||
case X86::VMOVDQU16Zmr:
|
||||
case X86::VMOVDQA32Zmr:
|
||||
case X86::VMOVDQU32Zmr:
|
||||
case X86::VMOVDQA64Zmr:
|
||||
case X86::VMOVDQU64Zmr:
|
||||
MemBytes = 64;
|
||||
return true;
|
||||
}
|
||||
return false;
|
||||
@ -4096,7 +4123,14 @@ static bool isFrameStoreOpcode(int Opcode) {
|
||||
|
||||
unsigned X86InstrInfo::isLoadFromStackSlot(const MachineInstr &MI,
|
||||
int &FrameIndex) const {
|
||||
if (isFrameLoadOpcode(MI.getOpcode()))
|
||||
unsigned Dummy;
|
||||
return X86InstrInfo::isLoadFromStackSlot(MI, FrameIndex, Dummy);
|
||||
}
|
||||
|
||||
unsigned X86InstrInfo::isLoadFromStackSlot(const MachineInstr &MI,
|
||||
int &FrameIndex,
|
||||
unsigned &MemBytes) const {
|
||||
if (isFrameLoadOpcode(MI.getOpcode(), MemBytes))
|
||||
if (MI.getOperand(0).getSubReg() == 0 && isFrameOperand(MI, 1, FrameIndex))
|
||||
return MI.getOperand(0).getReg();
|
||||
return 0;
|
||||
@ -4104,7 +4138,8 @@ unsigned X86InstrInfo::isLoadFromStackSlot(const MachineInstr &MI,
|
||||
|
||||
unsigned X86InstrInfo::isLoadFromStackSlotPostFE(const MachineInstr &MI,
|
||||
int &FrameIndex) const {
|
||||
if (isFrameLoadOpcode(MI.getOpcode())) {
|
||||
unsigned Dummy;
|
||||
if (isFrameLoadOpcode(MI.getOpcode(), Dummy)) {
|
||||
unsigned Reg;
|
||||
if ((Reg = isLoadFromStackSlot(MI, FrameIndex)))
|
||||
return Reg;
|
||||
@ -4117,7 +4152,14 @@ unsigned X86InstrInfo::isLoadFromStackSlotPostFE(const MachineInstr &MI,
|
||||
|
||||
unsigned X86InstrInfo::isStoreToStackSlot(const MachineInstr &MI,
|
||||
int &FrameIndex) const {
|
||||
if (isFrameStoreOpcode(MI.getOpcode()))
|
||||
unsigned Dummy;
|
||||
return X86InstrInfo::isStoreToStackSlot(MI, FrameIndex, Dummy);
|
||||
}
|
||||
|
||||
unsigned X86InstrInfo::isStoreToStackSlot(const MachineInstr &MI,
|
||||
int &FrameIndex,
|
||||
unsigned &MemBytes) const {
|
||||
if (isFrameStoreOpcode(MI.getOpcode(), MemBytes))
|
||||
if (MI.getOperand(X86::AddrNumOperands).getSubReg() == 0 &&
|
||||
isFrameOperand(MI, 0, FrameIndex))
|
||||
return MI.getOperand(X86::AddrNumOperands).getReg();
|
||||
@ -4126,7 +4168,8 @@ unsigned X86InstrInfo::isStoreToStackSlot(const MachineInstr &MI,
|
||||
|
||||
unsigned X86InstrInfo::isStoreToStackSlotPostFE(const MachineInstr &MI,
|
||||
int &FrameIndex) const {
|
||||
if (isFrameStoreOpcode(MI.getOpcode())) {
|
||||
unsigned Dummy;
|
||||
if (isFrameStoreOpcode(MI.getOpcode(), Dummy)) {
|
||||
unsigned Reg;
|
||||
if ((Reg = isStoreToStackSlot(MI, FrameIndex)))
|
||||
return Reg;
|
||||
|
@ -238,6 +238,9 @@ public:
|
||||
|
||||
unsigned isLoadFromStackSlot(const MachineInstr &MI,
|
||||
int &FrameIndex) const override;
|
||||
unsigned isLoadFromStackSlot(const MachineInstr &MI,
|
||||
int &FrameIndex,
|
||||
unsigned &MemBytes) const override;
|
||||
/// isLoadFromStackSlotPostFE - Check for post-frame ptr elimination
|
||||
/// stack locations as well. This uses a heuristic so it isn't
|
||||
/// reliable for correctness.
|
||||
@ -246,6 +249,9 @@ public:
|
||||
|
||||
unsigned isStoreToStackSlot(const MachineInstr &MI,
|
||||
int &FrameIndex) const override;
|
||||
unsigned isStoreToStackSlot(const MachineInstr &MI,
|
||||
int &FrameIndex,
|
||||
unsigned &MemBytes) const override;
|
||||
/// isStoreToStackSlotPostFE - Check for post-frame ptr elimination
|
||||
/// stack locations as well. This uses a heuristic so it isn't
|
||||
/// reliable for correctness.
|
||||
|
133
test/CodeGen/X86/pr30821.mir
Normal file
133
test/CodeGen/X86/pr30821.mir
Normal file
@ -0,0 +1,133 @@
|
||||
# RUN: llc -x mir < %s -run-pass=greedy,virtregrewriter,stack-slot-coloring | FileCheck %s
|
||||
--- |
|
||||
target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
|
||||
target triple = "x86_64-unknown-linux-gnu"
|
||||
|
||||
define dso_local i32 @main() local_unnamed_addr {
|
||||
entry:
|
||||
; Dummy IR that just performs some allocas -- the machine IR function
|
||||
; below is what this test is about.
|
||||
%alpha = alloca i8, align 1
|
||||
%foxtrot = alloca <2 x double>, align 16
|
||||
%india = alloca <2 x double>, align 16
|
||||
ret i32 0
|
||||
}
|
||||
|
||||
...
|
||||
---
|
||||
name: main
|
||||
alignment: 4
|
||||
exposesReturnsTwice: false
|
||||
legalized: false
|
||||
regBankSelected: false
|
||||
selected: false
|
||||
failedISel: false
|
||||
tracksRegLiveness: true
|
||||
registers:
|
||||
liveins:
|
||||
frameInfo:
|
||||
isFrameAddressTaken: false
|
||||
isReturnAddressTaken: false
|
||||
hasStackMap: false
|
||||
hasPatchPoint: false
|
||||
stackSize: 0
|
||||
offsetAdjustment: 0
|
||||
maxAlignment: 16
|
||||
adjustsStack: false
|
||||
hasCalls: true
|
||||
stackProtector: ''
|
||||
maxCallFrameSize: 4294967295
|
||||
hasOpaqueSPAdjustment: false
|
||||
hasVAStart: false
|
||||
hasMustTailInVarArgFunc: false
|
||||
localFrameSize: 0
|
||||
savePoint: ''
|
||||
restorePoint: ''
|
||||
fixedStack:
|
||||
stack:
|
||||
- { id: 0, name: alpha, type: default, offset: 0, size: 1, alignment: 1,
|
||||
stack-id: 0, callee-saved-register: '', callee-saved-restored: true,
|
||||
di-variable: '', di-expression: '', di-location: '' }
|
||||
- { id: 1, name: foxtrot, type: default, offset: 0, size: 16, alignment: 16,
|
||||
stack-id: 0, callee-saved-register: '', callee-saved-restored: true,
|
||||
di-variable: '', di-expression: '', di-location: '' }
|
||||
- { id: 2, name: india, type: default, offset: 0, size: 16, alignment: 16,
|
||||
stack-id: 0, callee-saved-register: '', callee-saved-restored: true,
|
||||
di-variable: '', di-expression: '', di-location: '' }
|
||||
constants:
|
||||
body: |
|
||||
bb.0.entry:
|
||||
; To trick stack-slot-colouring to run its dead-store-elimination phase,
|
||||
; which is at fault, we need the register allocator to run, and spill in two
|
||||
; places that can have their slots merged. Achieve this by volatile-loading
|
||||
; data into $xmm[0-14] and volatile storing them later, leaving regalloc only
|
||||
; $xmm15 to play with in the middle.
|
||||
; Then, perform two virtreg load-and-store pairs, with the faulty code
|
||||
; sequence in the middle (MOVSDrm then MOVAPDmr on the same slot). The virtreg
|
||||
; gets spilt; the corresponding stack slots merged; and faulty code sequence
|
||||
; eliminated if LLVM is broken.
|
||||
|
||||
; Make first 15 $xmm registers live
|
||||
$xmm0 = MOVUPDrm %stack.2.india, 1, $noreg, 0, $noreg :: (volatile dereferenceable load 16 from %ir.india)
|
||||
$xmm1 = MOVUPDrm %stack.2.india, 1, $noreg, 0, $noreg :: (volatile dereferenceable load 16 from %ir.india)
|
||||
$xmm2 = MOVUPDrm %stack.2.india, 1, $noreg, 0, $noreg :: (volatile dereferenceable load 16 from %ir.india)
|
||||
$xmm3 = MOVUPDrm %stack.2.india, 1, $noreg, 0, $noreg :: (volatile dereferenceable load 16 from %ir.india)
|
||||
$xmm4 = MOVUPDrm %stack.2.india, 1, $noreg, 0, $noreg :: (volatile dereferenceable load 16 from %ir.india)
|
||||
$xmm5 = MOVUPDrm %stack.2.india, 1, $noreg, 0, $noreg :: (volatile dereferenceable load 16 from %ir.india)
|
||||
$xmm6 = MOVUPDrm %stack.2.india, 1, $noreg, 0, $noreg :: (volatile dereferenceable load 16 from %ir.india)
|
||||
$xmm7 = MOVUPDrm %stack.2.india, 1, $noreg, 0, $noreg :: (volatile dereferenceable load 16 from %ir.india)
|
||||
$xmm8 = MOVUPDrm %stack.2.india, 1, $noreg, 0, $noreg :: (volatile dereferenceable load 16 from %ir.india)
|
||||
$xmm9 = MOVUPDrm %stack.2.india, 1, $noreg, 0, $noreg :: (volatile dereferenceable load 16 from %ir.india)
|
||||
$xmm10 = MOVUPDrm %stack.2.india, 1, $noreg, 0, $noreg :: (volatile dereferenceable load 16 from %ir.india)
|
||||
$xmm11 = MOVUPDrm %stack.2.india, 1, $noreg, 0, $noreg :: (volatile dereferenceable load 16 from %ir.india)
|
||||
$xmm12 = MOVUPDrm %stack.2.india, 1, $noreg, 0, $noreg :: (volatile dereferenceable load 16 from %ir.india)
|
||||
$xmm13 = MOVUPDrm %stack.2.india, 1, $noreg, 0, $noreg :: (volatile dereferenceable load 16 from %ir.india)
|
||||
$xmm14 = MOVUPDrm %stack.2.india, 1, $noreg, 0, $noreg :: (volatile dereferenceable load 16 from %ir.india)
|
||||
|
||||
; First vreg load
|
||||
%1:vr128 = MOVUPDrm %stack.2.india, 1, $noreg, 0, $noreg :: (volatile dereferenceable load 16 from %ir.india)
|
||||
|
||||
; First faulty sequence; %1 spilt
|
||||
%12:fr64 = MOVSDrm %stack.2.india, 1, $noreg, 0, $noreg :: (volatile dereferenceable load 8 from %ir.india)
|
||||
%13:vr128 = COPY killed %12
|
||||
MOVAPDmr %stack.2.india, 1, $noreg, 0, $noreg, killed %13 :: (volatile store 16 into %ir.india)
|
||||
; CHECK: renamable $xmm{{[0-9]+}} = MOVSDrm %stack.2.india, 1, $noreg, 0, $noreg :: (volatile dereferenceable load 8 from %ir.india)
|
||||
; CHECK-NEXT: MOVAPDmr %stack.2.india, 1, $noreg, 0, $noreg, killed renamable $xmm{{[0-9]+}} :: (volatile store 16 into %ir.india)
|
||||
|
||||
; Store %1 to avoid it being optimised out, will result in a load-from-spill
|
||||
MOVUPDmr %stack.2.india, 1, $noreg, 0, $noreg, killed %1 :: (volatile dereferenceable store 16 into %ir.india)
|
||||
|
||||
; That code sequence a second time, to generate a second spill slot that
|
||||
; will get coloured and merged.
|
||||
%2:vr128 = MOVUPDrm %stack.2.india, 1, $noreg, 0, $noreg :: (volatile dereferenceable load 16 from %ir.india)
|
||||
|
||||
%22:fr64 = MOVSDrm %stack.2.india, 1, $noreg, 0, $noreg :: (volatile dereferenceable load 8 from %ir.india)
|
||||
%23:vr128 = COPY killed %22
|
||||
MOVAPDmr %stack.2.india, 1, $noreg, 0, $noreg, killed %23 :: (volatile store 16 into %ir.india)
|
||||
|
||||
; CHECK: renamable $xmm{{[0-9]+}} = MOVSDrm %stack.2.india, 1, $noreg, 0, $noreg :: (volatile dereferenceable load 8 from %ir.india)
|
||||
; CHECK-NEXT: MOVAPDmr %stack.2.india, 1, $noreg, 0, $noreg, killed renamable $xmm{{[0-9]+}} :: (volatile store 16 into %ir.india)
|
||||
|
||||
MOVUPDmr %stack.2.india, 1, $noreg, 0, $noreg, killed %2 :: (volatile dereferenceable store 16 into %ir.india)
|
||||
|
||||
; Stores of first 15 $xmm registers to keep them live across the middle of
|
||||
; this bb.
|
||||
MOVUPDmr %stack.2.india, 1, $noreg, 0, $noreg, killed $xmm0 :: (volatile dereferenceable store 16 into %ir.india)
|
||||
MOVUPDmr %stack.2.india, 1, $noreg, 0, $noreg, killed $xmm1 :: (volatile dereferenceable store 16 into %ir.india)
|
||||
MOVUPDmr %stack.2.india, 1, $noreg, 0, $noreg, killed $xmm2 :: (volatile dereferenceable store 16 into %ir.india)
|
||||
MOVUPDmr %stack.2.india, 1, $noreg, 0, $noreg, killed $xmm3 :: (volatile dereferenceable store 16 into %ir.india)
|
||||
MOVUPDmr %stack.2.india, 1, $noreg, 0, $noreg, killed $xmm4 :: (volatile dereferenceable store 16 into %ir.india)
|
||||
MOVUPDmr %stack.2.india, 1, $noreg, 0, $noreg, killed $xmm5 :: (volatile dereferenceable store 16 into %ir.india)
|
||||
MOVUPDmr %stack.2.india, 1, $noreg, 0, $noreg, killed $xmm6 :: (volatile dereferenceable store 16 into %ir.india)
|
||||
MOVUPDmr %stack.2.india, 1, $noreg, 0, $noreg, killed $xmm7 :: (volatile dereferenceable store 16 into %ir.india)
|
||||
MOVUPDmr %stack.2.india, 1, $noreg, 0, $noreg, killed $xmm8 :: (volatile dereferenceable store 16 into %ir.india)
|
||||
MOVUPDmr %stack.2.india, 1, $noreg, 0, $noreg, killed $xmm9 :: (volatile dereferenceable store 16 into %ir.india)
|
||||
MOVUPDmr %stack.2.india, 1, $noreg, 0, $noreg, killed $xmm10 :: (volatile dereferenceable store 16 into %ir.india)
|
||||
MOVUPDmr %stack.2.india, 1, $noreg, 0, $noreg, killed $xmm11 :: (volatile dereferenceable store 16 into %ir.india)
|
||||
MOVUPDmr %stack.2.india, 1, $noreg, 0, $noreg, killed $xmm12 :: (volatile dereferenceable store 16 into %ir.india)
|
||||
MOVUPDmr %stack.2.india, 1, $noreg, 0, $noreg, killed $xmm13 :: (volatile dereferenceable store 16 into %ir.india)
|
||||
MOVUPDmr %stack.2.india, 1, $noreg, 0, $noreg, killed $xmm14 :: (volatile dereferenceable store 16 into %ir.india)
|
||||
|
||||
RET 0
|
||||
|
||||
...
|
Loading…
Reference in New Issue
Block a user