mirror of
https://github.com/RPCS3/llvm-mirror.git
synced 2025-01-31 20:51:52 +01:00
Revert [WinEH] Allocate space in funclets stack to save XMM CSRs
This reverts r367088 (git commit 9ad565f70ec5fd3531056d7c939302d4ea970c83) And the follow up fix r368631 / e9865b9b31bb2e6bc742dc6fca8f9f9517c3c43e llvm-svn: 369457
This commit is contained in:
parent
defcf6aa12
commit
11972b9400
@ -935,10 +935,7 @@ bool X86FrameLowering::has128ByteRedZone(const MachineFunction& MF) const {
|
|||||||
; calls @llvm.eh.unwind.init
|
; calls @llvm.eh.unwind.init
|
||||||
[if needs FP]
|
[if needs FP]
|
||||||
[for all callee-saved XMM registers]
|
[for all callee-saved XMM registers]
|
||||||
[if funclet]
|
movaps %<xmm reg>, -MMM(%rbp)
|
||||||
movaps %<xmm reg>, -MMM(%rsp)
|
|
||||||
[else]
|
|
||||||
movaps %<xmm reg>, -MMM(%rbp)
|
|
||||||
[for all callee-saved XMM registers]
|
[for all callee-saved XMM registers]
|
||||||
.seh_savexmm %<xmm reg>, (-MMM + SEHFrameOffset)
|
.seh_savexmm %<xmm reg>, (-MMM + SEHFrameOffset)
|
||||||
; i.e. the offset relative to (%rbp - SEHFrameOffset)
|
; i.e. the offset relative to (%rbp - SEHFrameOffset)
|
||||||
@ -958,10 +955,7 @@ bool X86FrameLowering::has128ByteRedZone(const MachineFunction& MF) const {
|
|||||||
; Emit CFI info
|
; Emit CFI info
|
||||||
[if needs FP]
|
[if needs FP]
|
||||||
[for all callee-saved registers]
|
[for all callee-saved registers]
|
||||||
[if funclet]
|
.cfi_offset %<reg>, (offset from %rbp)
|
||||||
movaps -MMM(%rsp), %<xmm reg>
|
|
||||||
[else]
|
|
||||||
.cfi_offset %<reg>, (offset from %rbp)
|
|
||||||
[else]
|
[else]
|
||||||
.cfi_def_cfa_offset (offset from RETADDR)
|
.cfi_def_cfa_offset (offset from RETADDR)
|
||||||
[for all callee-saved registers]
|
[for all callee-saved registers]
|
||||||
@ -1176,16 +1170,11 @@ void X86FrameLowering::emitPrologue(MachineFunction &MF,
|
|||||||
MFI.setOffsetAdjustment(-StackSize);
|
MFI.setOffsetAdjustment(-StackSize);
|
||||||
}
|
}
|
||||||
|
|
||||||
// For EH funclets, only allocate enough space for outgoing calls and callee
|
// For EH funclets, only allocate enough space for outgoing calls. Save the
|
||||||
// saved XMM registers on Windows 64 bits. Save the NumBytes value that we
|
// NumBytes value that we would've used for the parent frame.
|
||||||
// would've used for the parent frame.
|
|
||||||
int XMMFrameSlotOrigin;
|
|
||||||
unsigned ParentFrameNumBytes = NumBytes;
|
unsigned ParentFrameNumBytes = NumBytes;
|
||||||
if (IsFunclet) {
|
if (IsFunclet)
|
||||||
NumBytes = getWinEHFuncletFrameSize(MF);
|
NumBytes = getWinEHFuncletFrameSize(MF);
|
||||||
if (IsWin64Prologue)
|
|
||||||
NumBytes += X86FI->getCalleeSavedXMMFrameInfo(XMMFrameSlotOrigin);
|
|
||||||
}
|
|
||||||
|
|
||||||
// Skip the callee-saved push instructions.
|
// Skip the callee-saved push instructions.
|
||||||
bool PushedRegs = false;
|
bool PushedRegs = false;
|
||||||
@ -1393,33 +1382,19 @@ void X86FrameLowering::emitPrologue(MachineFunction &MF,
|
|||||||
}
|
}
|
||||||
|
|
||||||
while (MBBI != MBB.end() && MBBI->getFlag(MachineInstr::FrameSetup)) {
|
while (MBBI != MBB.end() && MBBI->getFlag(MachineInstr::FrameSetup)) {
|
||||||
auto FrameInstr = MBBI;
|
const MachineInstr &FrameInstr = *MBBI;
|
||||||
++MBBI;
|
++MBBI;
|
||||||
|
|
||||||
if (NeedsWinCFI) {
|
if (NeedsWinCFI) {
|
||||||
int FI;
|
int FI;
|
||||||
if (unsigned Reg = TII.isStoreToStackSlot(*FrameInstr, FI)) {
|
if (unsigned Reg = TII.isStoreToStackSlot(FrameInstr, FI)) {
|
||||||
if (X86::FR64RegClass.contains(Reg)) {
|
if (X86::FR64RegClass.contains(Reg)) {
|
||||||
int Offset = 0;
|
unsigned IgnoredFrameReg;
|
||||||
|
int Offset = getFrameIndexReference(MF, FI, IgnoredFrameReg);
|
||||||
|
Offset += SEHFrameOffset;
|
||||||
|
|
||||||
HasWinCFI = true;
|
HasWinCFI = true;
|
||||||
if (IsFunclet) {
|
assert(!NeedsWinFPO && "SEH_SaveXMM incompatible with FPO data");
|
||||||
assert(IsWin64Prologue && "Only valid on Windows 64bit");
|
|
||||||
unsigned Size = TRI->getSpillSize(X86::VR128RegClass);
|
|
||||||
unsigned Align = TRI->getSpillAlignment(X86::VR128RegClass);
|
|
||||||
Offset = (FI - XMMFrameSlotOrigin - 1) * Size +
|
|
||||||
alignDown(NumBytes, Align);
|
|
||||||
addRegOffset(BuildMI(MBB, MBBI, DL,
|
|
||||||
TII.get(getXMMAlignedLoadStoreOp(false))),
|
|
||||||
StackPtr, true, Offset)
|
|
||||||
.addReg(Reg)
|
|
||||||
.setMIFlag(MachineInstr::FrameSetup);
|
|
||||||
MBB.erase(FrameInstr);
|
|
||||||
} else {
|
|
||||||
assert(!NeedsWinFPO && "SEH_SaveXMM incompatible with FPO data");
|
|
||||||
unsigned IgnoredFrameReg;
|
|
||||||
Offset = getFrameIndexReference(MF, FI, IgnoredFrameReg) +
|
|
||||||
SEHFrameOffset;
|
|
||||||
}
|
|
||||||
BuildMI(MBB, MBBI, DL, TII.get(X86::SEH_SaveXMM))
|
BuildMI(MBB, MBBI, DL, TII.get(X86::SEH_SaveXMM))
|
||||||
.addImm(Reg)
|
.addImm(Reg)
|
||||||
.addImm(Offset)
|
.addImm(Offset)
|
||||||
@ -1639,9 +1614,6 @@ void X86FrameLowering::emitEpilogue(MachineFunction &MF,
|
|||||||
if (IsFunclet) {
|
if (IsFunclet) {
|
||||||
assert(HasFP && "EH funclets without FP not yet implemented");
|
assert(HasFP && "EH funclets without FP not yet implemented");
|
||||||
NumBytes = getWinEHFuncletFrameSize(MF);
|
NumBytes = getWinEHFuncletFrameSize(MF);
|
||||||
int Ignore;
|
|
||||||
if (IsWin64Prologue)
|
|
||||||
NumBytes += X86FI->getCalleeSavedXMMFrameInfo(Ignore);
|
|
||||||
} else if (HasFP) {
|
} else if (HasFP) {
|
||||||
// Calculate required stack adjustment.
|
// Calculate required stack adjustment.
|
||||||
uint64_t FrameSize = StackSize - SlotSize;
|
uint64_t FrameSize = StackSize - SlotSize;
|
||||||
@ -1969,8 +1941,6 @@ bool X86FrameLowering::assignCalleeSavedSpillSlots(
|
|||||||
X86MachineFunctionInfo *X86FI = MF.getInfo<X86MachineFunctionInfo>();
|
X86MachineFunctionInfo *X86FI = MF.getInfo<X86MachineFunctionInfo>();
|
||||||
|
|
||||||
unsigned CalleeSavedFrameSize = 0;
|
unsigned CalleeSavedFrameSize = 0;
|
||||||
unsigned CalleeSavedXMMFrameSize = 0;
|
|
||||||
int CalleeSavedXMMSlotOrigin = 0;
|
|
||||||
int SpillSlotOffset = getOffsetOfLocalArea() + X86FI->getTCReturnAddrDelta();
|
int SpillSlotOffset = getOffsetOfLocalArea() + X86FI->getTCReturnAddrDelta();
|
||||||
|
|
||||||
int64_t TailCallReturnAddrDelta = X86FI->getTCReturnAddrDelta();
|
int64_t TailCallReturnAddrDelta = X86FI->getTCReturnAddrDelta();
|
||||||
@ -2034,44 +2004,9 @@ bool X86FrameLowering::assignCalleeSavedSpillSlots(
|
|||||||
MFI.setCVBytesOfCalleeSavedRegisters(CalleeSavedFrameSize);
|
MFI.setCVBytesOfCalleeSavedRegisters(CalleeSavedFrameSize);
|
||||||
|
|
||||||
// Assign slots for XMMs.
|
// Assign slots for XMMs.
|
||||||
for (unsigned i = CSI.size(), Size = 0; i != 0; --i) {
|
|
||||||
unsigned Reg = CSI[i - 1].getReg();
|
|
||||||
// According to Microsoft "x64 software conventions", only XMM registers
|
|
||||||
// are nonvolatile except the GPR.
|
|
||||||
if (!X86::VR128RegClass.contains(Reg))
|
|
||||||
continue;
|
|
||||||
// Since all registers have the same size, we just initialize once.
|
|
||||||
if (Size == 0) {
|
|
||||||
unsigned Align = TRI->getSpillAlignment(X86::VR128RegClass);
|
|
||||||
// ensure alignment
|
|
||||||
int Remainder = SpillSlotOffset % Align;
|
|
||||||
if (Remainder < 0)
|
|
||||||
SpillSlotOffset -= Align + Remainder;
|
|
||||||
else
|
|
||||||
SpillSlotOffset -= Remainder;
|
|
||||||
MFI.ensureMaxAlignment(Align);
|
|
||||||
Size = TRI->getSpillSize(X86::VR128RegClass);
|
|
||||||
}
|
|
||||||
// spill into slot
|
|
||||||
SpillSlotOffset -= Size;
|
|
||||||
int SlotIndex = MFI.CreateFixedSpillStackObject(Size, SpillSlotOffset);
|
|
||||||
CSI[i - 1].setFrameIdx(SlotIndex);
|
|
||||||
// Since we allocate XMM slot consecutively in stack, we just need to
|
|
||||||
// record the first one for the funclet use.
|
|
||||||
if (CalleeSavedXMMFrameSize == 0) {
|
|
||||||
CalleeSavedXMMSlotOrigin = SlotIndex;
|
|
||||||
}
|
|
||||||
CalleeSavedXMMFrameSize += Size;
|
|
||||||
}
|
|
||||||
|
|
||||||
X86FI->setCalleeSavedXMMFrameInfo(CalleeSavedXMMFrameSize,
|
|
||||||
CalleeSavedXMMSlotOrigin);
|
|
||||||
|
|
||||||
// Assign slots for others.
|
|
||||||
for (unsigned i = CSI.size(); i != 0; --i) {
|
for (unsigned i = CSI.size(); i != 0; --i) {
|
||||||
unsigned Reg = CSI[i - 1].getReg();
|
unsigned Reg = CSI[i - 1].getReg();
|
||||||
if (X86::GR64RegClass.contains(Reg) || X86::GR32RegClass.contains(Reg) ||
|
if (X86::GR64RegClass.contains(Reg) || X86::GR32RegClass.contains(Reg))
|
||||||
X86::VR128RegClass.contains(Reg))
|
|
||||||
continue;
|
continue;
|
||||||
|
|
||||||
// If this is k-register make sure we lookup via the largest legal type.
|
// If this is k-register make sure we lookup via the largest legal type.
|
||||||
@ -2083,11 +2018,7 @@ bool X86FrameLowering::assignCalleeSavedSpillSlots(
|
|||||||
unsigned Size = TRI->getSpillSize(*RC);
|
unsigned Size = TRI->getSpillSize(*RC);
|
||||||
unsigned Align = TRI->getSpillAlignment(*RC);
|
unsigned Align = TRI->getSpillAlignment(*RC);
|
||||||
// ensure alignment
|
// ensure alignment
|
||||||
int Remainder = SpillSlotOffset % Align;
|
SpillSlotOffset -= std::abs(SpillSlotOffset) % Align;
|
||||||
if (Remainder < 0)
|
|
||||||
SpillSlotOffset -= Align + Remainder;
|
|
||||||
else
|
|
||||||
SpillSlotOffset -= Remainder;
|
|
||||||
// spill into slot
|
// spill into slot
|
||||||
SpillSlotOffset -= Size;
|
SpillSlotOffset -= Size;
|
||||||
int SlotIndex = MFI.CreateFixedSpillStackObject(Size, SpillSlotOffset);
|
int SlotIndex = MFI.CreateFixedSpillStackObject(Size, SpillSlotOffset);
|
||||||
@ -2226,32 +2157,19 @@ bool X86FrameLowering::restoreCalleeSavedRegisters(MachineBasicBlock &MBB,
|
|||||||
DebugLoc DL = MBB.findDebugLoc(MI);
|
DebugLoc DL = MBB.findDebugLoc(MI);
|
||||||
|
|
||||||
// Reload XMMs from stack frame.
|
// Reload XMMs from stack frame.
|
||||||
MachineFunction &MF = *MBB.getParent();
|
|
||||||
X86MachineFunctionInfo *X86FI = MF.getInfo<X86MachineFunctionInfo>();
|
|
||||||
int XMMFrameSlotOrigin;
|
|
||||||
int SEHFrameOffset = X86FI->getCalleeSavedXMMFrameInfo(XMMFrameSlotOrigin) +
|
|
||||||
MF.getFrameInfo().getMaxCallFrameSize();
|
|
||||||
for (unsigned i = 0, e = CSI.size(); i != e; ++i) {
|
for (unsigned i = 0, e = CSI.size(); i != e; ++i) {
|
||||||
unsigned Reg = CSI[i].getReg();
|
unsigned Reg = CSI[i].getReg();
|
||||||
if (MBB.isEHFuncletEntry() && STI.is64Bit()) {
|
if (X86::GR64RegClass.contains(Reg) ||
|
||||||
if (X86::VR128RegClass.contains(Reg)) {
|
X86::GR32RegClass.contains(Reg))
|
||||||
int Offset = (CSI[i].getFrameIdx() - XMMFrameSlotOrigin - 1) * 16;
|
continue;
|
||||||
addRegOffset(BuildMI(MBB, MI, DL,
|
|
||||||
TII.get(getXMMAlignedLoadStoreOp(true)), Reg),
|
|
||||||
X86::RSP, true, SEHFrameOffset + Offset);
|
|
||||||
}
|
|
||||||
} else {
|
|
||||||
if (X86::GR64RegClass.contains(Reg) || X86::GR32RegClass.contains(Reg))
|
|
||||||
continue;
|
|
||||||
|
|
||||||
// If this is k-register make sure we lookup via the largest legal type.
|
// If this is k-register make sure we lookup via the largest legal type.
|
||||||
MVT VT = MVT::Other;
|
MVT VT = MVT::Other;
|
||||||
if (X86::VK16RegClass.contains(Reg))
|
if (X86::VK16RegClass.contains(Reg))
|
||||||
VT = STI.hasBWI() ? MVT::v64i1 : MVT::v16i1;
|
VT = STI.hasBWI() ? MVT::v64i1 : MVT::v16i1;
|
||||||
|
|
||||||
const TargetRegisterClass *RC = TRI->getMinimalPhysRegClass(Reg, VT);
|
const TargetRegisterClass *RC = TRI->getMinimalPhysRegClass(Reg, VT);
|
||||||
TII.loadRegFromStackSlot(MBB, MI, Reg, CSI[i].getFrameIdx(), RC, TRI);
|
TII.loadRegFromStackSlot(MBB, MI, Reg, CSI[i].getFrameIdx(), RC, TRI);
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
|
||||||
// POP GPRs.
|
// POP GPRs.
|
||||||
@ -3191,19 +3109,14 @@ void X86FrameLowering::orderFrameObjects(
|
|||||||
std::reverse(ObjectsToAllocate.begin(), ObjectsToAllocate.end());
|
std::reverse(ObjectsToAllocate.begin(), ObjectsToAllocate.end());
|
||||||
}
|
}
|
||||||
|
|
||||||
unsigned
|
|
||||||
X86FrameLowering::getWinEHParentFrameOffset(const MachineFunction &MF) const {
|
unsigned X86FrameLowering::getWinEHParentFrameOffset(const MachineFunction &MF) const {
|
||||||
const X86MachineFunctionInfo *X86FI = MF.getInfo<X86MachineFunctionInfo>();
|
|
||||||
// RDX, the parent frame pointer, is homed into 16(%rsp) in the prologue.
|
// RDX, the parent frame pointer, is homed into 16(%rsp) in the prologue.
|
||||||
unsigned Offset = 16;
|
unsigned Offset = 16;
|
||||||
// RBP is immediately pushed.
|
// RBP is immediately pushed.
|
||||||
Offset += SlotSize;
|
Offset += SlotSize;
|
||||||
// All callee-saved registers are then pushed.
|
// All callee-saved registers are then pushed.
|
||||||
Offset += X86FI->getCalleeSavedFrameSize();
|
Offset += MF.getInfo<X86MachineFunctionInfo>()->getCalleeSavedFrameSize();
|
||||||
// Funclets allocate space for however XMM registers are required.
|
|
||||||
int Ignore;
|
|
||||||
if (MF.getTarget().getMCAsmInfo()->usesWindowsCFI())
|
|
||||||
Offset += X86FI->getCalleeSavedXMMFrameInfo(Ignore);
|
|
||||||
// Every funclet allocates enough stack space for the largest outgoing call.
|
// Every funclet allocates enough stack space for the largest outgoing call.
|
||||||
Offset += getWinEHFuncletFrameSize(MF);
|
Offset += getWinEHFuncletFrameSize(MF);
|
||||||
return Offset;
|
return Offset;
|
||||||
@ -3265,8 +3178,3 @@ void X86FrameLowering::processFunctionBeforeFrameFinalized(
|
|||||||
UnwindHelpFI)
|
UnwindHelpFI)
|
||||||
.addImm(-2);
|
.addImm(-2);
|
||||||
}
|
}
|
||||||
|
|
||||||
unsigned X86FrameLowering::getXMMAlignedLoadStoreOp(const bool IsLoad) const {
|
|
||||||
return IsLoad ? (STI.hasAVX() ? X86::VMOVAPSrm : X86::MOVAPSrm)
|
|
||||||
: (STI.hasAVX() ? X86::VMOVAPSmr : X86::MOVAPSmr);
|
|
||||||
}
|
|
||||||
|
@ -217,10 +217,6 @@ private:
|
|||||||
void emitCatchRetReturnValue(MachineBasicBlock &MBB,
|
void emitCatchRetReturnValue(MachineBasicBlock &MBB,
|
||||||
MachineBasicBlock::iterator MBBI,
|
MachineBasicBlock::iterator MBBI,
|
||||||
MachineInstr *CatchRet) const;
|
MachineInstr *CatchRet) const;
|
||||||
|
|
||||||
/// Select the best opcode for the subtarget when funclet XMM CSRs
|
|
||||||
/// save/restore.
|
|
||||||
unsigned getXMMAlignedLoadStoreOp(const bool IsLoad) const;
|
|
||||||
};
|
};
|
||||||
|
|
||||||
} // End llvm namespace
|
} // End llvm namespace
|
||||||
|
@ -40,14 +40,6 @@ class X86MachineFunctionInfo : public MachineFunctionInfo {
|
|||||||
/// stack frame in bytes.
|
/// stack frame in bytes.
|
||||||
unsigned CalleeSavedFrameSize = 0;
|
unsigned CalleeSavedFrameSize = 0;
|
||||||
|
|
||||||
/// CalleeSavedXMMFrameSize - Size of the callee-saved XMM register portion
|
|
||||||
/// of the stack frame in bytes.
|
|
||||||
unsigned CalleeSavedXMMFrameSize = 0;
|
|
||||||
|
|
||||||
/// CalleeSavedXMMFrameOrigin - Origin slot of the callee-saved XMM register
|
|
||||||
/// portion of the stack frame.
|
|
||||||
int CalleeSavedXMMFrameOrigin = 0;
|
|
||||||
|
|
||||||
/// BytesToPopOnReturn - Number of bytes function pops on return (in addition
|
/// BytesToPopOnReturn - Number of bytes function pops on return (in addition
|
||||||
/// to the space used by the return address).
|
/// to the space used by the return address).
|
||||||
/// Used on windows platform for stdcall & fastcall name decoration
|
/// Used on windows platform for stdcall & fastcall name decoration
|
||||||
@ -131,11 +123,6 @@ public:
|
|||||||
unsigned getCalleeSavedFrameSize() const { return CalleeSavedFrameSize; }
|
unsigned getCalleeSavedFrameSize() const { return CalleeSavedFrameSize; }
|
||||||
void setCalleeSavedFrameSize(unsigned bytes) { CalleeSavedFrameSize = bytes; }
|
void setCalleeSavedFrameSize(unsigned bytes) { CalleeSavedFrameSize = bytes; }
|
||||||
|
|
||||||
unsigned getCalleeSavedXMMFrameInfo(int &origin) const
|
|
||||||
{ origin = CalleeSavedXMMFrameOrigin; return CalleeSavedXMMFrameSize; }
|
|
||||||
void setCalleeSavedXMMFrameInfo(unsigned size, int origin)
|
|
||||||
{ CalleeSavedXMMFrameSize = size; CalleeSavedXMMFrameOrigin = origin; }
|
|
||||||
|
|
||||||
unsigned getBytesToPopOnReturn() const { return BytesToPopOnReturn; }
|
unsigned getBytesToPopOnReturn() const { return BytesToPopOnReturn; }
|
||||||
void setBytesToPopOnReturn (unsigned bytes) { BytesToPopOnReturn = bytes;}
|
void setBytesToPopOnReturn (unsigned bytes) { BytesToPopOnReturn = bytes;}
|
||||||
|
|
||||||
|
@ -184,7 +184,7 @@ define intel_ocl_bicc <16 x float> @test_prolog_epilog(<16 x float> %a, <16 x fl
|
|||||||
; WIN64-KNL-LABEL: test_prolog_epilog:
|
; WIN64-KNL-LABEL: test_prolog_epilog:
|
||||||
; WIN64-KNL: # %bb.0:
|
; WIN64-KNL: # %bb.0:
|
||||||
; WIN64-KNL-NEXT: pushq %rbp
|
; WIN64-KNL-NEXT: pushq %rbp
|
||||||
; WIN64-KNL-NEXT: subq $1264, %rsp # imm = 0x4F0
|
; WIN64-KNL-NEXT: subq $1328, %rsp # imm = 0x530
|
||||||
; WIN64-KNL-NEXT: leaq {{[0-9]+}}(%rsp), %rbp
|
; WIN64-KNL-NEXT: leaq {{[0-9]+}}(%rsp), %rbp
|
||||||
; WIN64-KNL-NEXT: kmovw %k7, {{[-0-9]+}}(%r{{[sb]}}p) # 2-byte Spill
|
; WIN64-KNL-NEXT: kmovw %k7, {{[-0-9]+}}(%r{{[sb]}}p) # 2-byte Spill
|
||||||
; WIN64-KNL-NEXT: kmovw %k6, {{[-0-9]+}}(%r{{[sb]}}p) # 2-byte Spill
|
; WIN64-KNL-NEXT: kmovw %k6, {{[-0-9]+}}(%r{{[sb]}}p) # 2-byte Spill
|
||||||
@ -232,14 +232,14 @@ define intel_ocl_bicc <16 x float> @test_prolog_epilog(<16 x float> %a, <16 x fl
|
|||||||
; WIN64-KNL-NEXT: kmovw {{[-0-9]+}}(%r{{[sb]}}p), %k5 # 2-byte Reload
|
; WIN64-KNL-NEXT: kmovw {{[-0-9]+}}(%r{{[sb]}}p), %k5 # 2-byte Reload
|
||||||
; WIN64-KNL-NEXT: kmovw {{[-0-9]+}}(%r{{[sb]}}p), %k6 # 2-byte Reload
|
; WIN64-KNL-NEXT: kmovw {{[-0-9]+}}(%r{{[sb]}}p), %k6 # 2-byte Reload
|
||||||
; WIN64-KNL-NEXT: kmovw {{[-0-9]+}}(%r{{[sb]}}p), %k7 # 2-byte Reload
|
; WIN64-KNL-NEXT: kmovw {{[-0-9]+}}(%r{{[sb]}}p), %k7 # 2-byte Reload
|
||||||
; WIN64-KNL-NEXT: leaq 1136(%rbp), %rsp
|
; WIN64-KNL-NEXT: leaq 1200(%rbp), %rsp
|
||||||
; WIN64-KNL-NEXT: popq %rbp
|
; WIN64-KNL-NEXT: popq %rbp
|
||||||
; WIN64-KNL-NEXT: retq
|
; WIN64-KNL-NEXT: retq
|
||||||
;
|
;
|
||||||
; WIN64-SKX-LABEL: test_prolog_epilog:
|
; WIN64-SKX-LABEL: test_prolog_epilog:
|
||||||
; WIN64-SKX: # %bb.0:
|
; WIN64-SKX: # %bb.0:
|
||||||
; WIN64-SKX-NEXT: pushq %rbp
|
; WIN64-SKX-NEXT: pushq %rbp
|
||||||
; WIN64-SKX-NEXT: subq $1264, %rsp # imm = 0x4F0
|
; WIN64-SKX-NEXT: subq $1328, %rsp # imm = 0x530
|
||||||
; WIN64-SKX-NEXT: leaq {{[0-9]+}}(%rsp), %rbp
|
; WIN64-SKX-NEXT: leaq {{[0-9]+}}(%rsp), %rbp
|
||||||
; WIN64-SKX-NEXT: kmovq %k7, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
|
; WIN64-SKX-NEXT: kmovq %k7, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
|
||||||
; WIN64-SKX-NEXT: kmovq %k6, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
|
; WIN64-SKX-NEXT: kmovq %k6, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
|
||||||
@ -287,7 +287,7 @@ define intel_ocl_bicc <16 x float> @test_prolog_epilog(<16 x float> %a, <16 x fl
|
|||||||
; WIN64-SKX-NEXT: kmovq {{[-0-9]+}}(%r{{[sb]}}p), %k5 # 8-byte Reload
|
; WIN64-SKX-NEXT: kmovq {{[-0-9]+}}(%r{{[sb]}}p), %k5 # 8-byte Reload
|
||||||
; WIN64-SKX-NEXT: kmovq {{[-0-9]+}}(%r{{[sb]}}p), %k6 # 8-byte Reload
|
; WIN64-SKX-NEXT: kmovq {{[-0-9]+}}(%r{{[sb]}}p), %k6 # 8-byte Reload
|
||||||
; WIN64-SKX-NEXT: kmovq {{[-0-9]+}}(%r{{[sb]}}p), %k7 # 8-byte Reload
|
; WIN64-SKX-NEXT: kmovq {{[-0-9]+}}(%r{{[sb]}}p), %k7 # 8-byte Reload
|
||||||
; WIN64-SKX-NEXT: leaq 1136(%rbp), %rsp
|
; WIN64-SKX-NEXT: leaq 1200(%rbp), %rsp
|
||||||
; WIN64-SKX-NEXT: popq %rbp
|
; WIN64-SKX-NEXT: popq %rbp
|
||||||
; WIN64-SKX-NEXT: retq
|
; WIN64-SKX-NEXT: retq
|
||||||
;
|
;
|
||||||
@ -346,7 +346,7 @@ define intel_ocl_bicc <16 x float> @test_prolog_epilog(<16 x float> %a, <16 x fl
|
|||||||
; X64-SKX: ## %bb.0:
|
; X64-SKX: ## %bb.0:
|
||||||
; X64-SKX-NEXT: pushq %rsi
|
; X64-SKX-NEXT: pushq %rsi
|
||||||
; X64-SKX-NEXT: pushq %rdi
|
; X64-SKX-NEXT: pushq %rdi
|
||||||
; X64-SKX-NEXT: subq $1064, %rsp ## imm = 0x428
|
; X64-SKX-NEXT: subq $1192, %rsp ## imm = 0x4A8
|
||||||
; X64-SKX-NEXT: kmovq %k7, {{[-0-9]+}}(%r{{[sb]}}p) ## 8-byte Spill
|
; X64-SKX-NEXT: kmovq %k7, {{[-0-9]+}}(%r{{[sb]}}p) ## 8-byte Spill
|
||||||
; X64-SKX-NEXT: kmovq %k6, {{[-0-9]+}}(%r{{[sb]}}p) ## 8-byte Spill
|
; X64-SKX-NEXT: kmovq %k6, {{[-0-9]+}}(%r{{[sb]}}p) ## 8-byte Spill
|
||||||
; X64-SKX-NEXT: kmovq %k5, {{[-0-9]+}}(%r{{[sb]}}p) ## 8-byte Spill
|
; X64-SKX-NEXT: kmovq %k5, {{[-0-9]+}}(%r{{[sb]}}p) ## 8-byte Spill
|
||||||
@ -388,7 +388,7 @@ define intel_ocl_bicc <16 x float> @test_prolog_epilog(<16 x float> %a, <16 x fl
|
|||||||
; X64-SKX-NEXT: kmovq {{[-0-9]+}}(%r{{[sb]}}p), %k5 ## 8-byte Reload
|
; X64-SKX-NEXT: kmovq {{[-0-9]+}}(%r{{[sb]}}p), %k5 ## 8-byte Reload
|
||||||
; X64-SKX-NEXT: kmovq {{[-0-9]+}}(%r{{[sb]}}p), %k6 ## 8-byte Reload
|
; X64-SKX-NEXT: kmovq {{[-0-9]+}}(%r{{[sb]}}p), %k6 ## 8-byte Reload
|
||||||
; X64-SKX-NEXT: kmovq {{[-0-9]+}}(%r{{[sb]}}p), %k7 ## 8-byte Reload
|
; X64-SKX-NEXT: kmovq {{[-0-9]+}}(%r{{[sb]}}p), %k7 ## 8-byte Reload
|
||||||
; X64-SKX-NEXT: addq $1064, %rsp ## imm = 0x428
|
; X64-SKX-NEXT: addq $1192, %rsp ## imm = 0x4A8
|
||||||
; X64-SKX-NEXT: popq %rdi
|
; X64-SKX-NEXT: popq %rdi
|
||||||
; X64-SKX-NEXT: popq %rsi
|
; X64-SKX-NEXT: popq %rsi
|
||||||
; X64-SKX-NEXT: retq
|
; X64-SKX-NEXT: retq
|
||||||
|
@ -51,18 +51,3 @@ catch:
|
|||||||
; CHECK: popq %rbp
|
; CHECK: popq %rbp
|
||||||
; CHECK: retq
|
; CHECK: retq
|
||||||
; CHECK: .seh_handlerdata
|
; CHECK: .seh_handlerdata
|
||||||
; CHECK: # %catch
|
|
||||||
; CHECK: movq %rdx, 16(%rsp)
|
|
||||||
; CHECK: pushq %rbp
|
|
||||||
; CHECK: .seh_pushreg 5
|
|
||||||
; CHECK: subq $48, %rsp
|
|
||||||
; CHECK: .seh_stackalloc 48
|
|
||||||
; CHECK: leaq 64(%rdx), %rbp
|
|
||||||
; CHECK: movapd %xmm6, 32(%rsp)
|
|
||||||
; CHECK: .seh_savexmm 6, 32
|
|
||||||
; CHECK: .seh_endprologue
|
|
||||||
; CHECK: movapd 32(%rsp), %xmm6
|
|
||||||
; CHECK: leaq .LBB0_1(%rip), %rax
|
|
||||||
; CHECK: addq $48, %rsp
|
|
||||||
; CHECK: popq %rbp
|
|
||||||
; CHECK: retq # CATCHRET
|
|
||||||
|
@ -1,79 +0,0 @@
|
|||||||
; RUN: llc -mtriple=x86_64-pc-windows-msvc -mattr=+avx < %s | FileCheck %s
|
|
||||||
|
|
||||||
; void foo(void)
|
|
||||||
; {
|
|
||||||
; __asm("nop" ::: "bx", "cx", "xmm5", "xmm6", "ymm7");
|
|
||||||
; try {
|
|
||||||
; throw;
|
|
||||||
; }
|
|
||||||
; catch (int x) {
|
|
||||||
; }
|
|
||||||
; }
|
|
||||||
|
|
||||||
%rtti.TypeDescriptor2 = type { i8**, i8*, [3 x i8] }
|
|
||||||
%eh.ThrowInfo = type { i32, i8*, i8*, i8* }
|
|
||||||
|
|
||||||
$"??_R0H@8" = comdat any
|
|
||||||
|
|
||||||
@"??_7type_info@@6B@" = external constant i8*
|
|
||||||
@"??_R0H@8" = linkonce_odr global %rtti.TypeDescriptor2 { i8** @"??_7type_info@@6B@", i8* null, [3 x i8] c".H\00" }, comdat
|
|
||||||
|
|
||||||
declare dso_local i32 @__CxxFrameHandler3(...)
|
|
||||||
declare dso_local x86_stdcallcc void @_CxxThrowException(i8*, %eh.ThrowInfo*)
|
|
||||||
|
|
||||||
define dso_local void @"?foo@@YAXXZ"() personality i8* bitcast (i32 (...)* @__CxxFrameHandler3 to i8*) {
|
|
||||||
entry:
|
|
||||||
%x = alloca i32, align 4
|
|
||||||
call void asm sideeffect "nop", "~{bx},~{cx},~{xmm5},~{xmm6},~{ymm7}"()
|
|
||||||
invoke void @_CxxThrowException(i8* null, %eh.ThrowInfo* null)
|
|
||||||
to label %unreachable unwind label %catch.dispatch
|
|
||||||
|
|
||||||
catch.dispatch: ; preds = %entry
|
|
||||||
%0 = catchswitch within none [label %catch] unwind to caller
|
|
||||||
|
|
||||||
catch: ; preds = %catch.dispatch
|
|
||||||
%1 = catchpad within %0 [%rtti.TypeDescriptor2* @"??_R0H@8", i32 0, i32* %x]
|
|
||||||
catchret from %1 to label %catchret.dest
|
|
||||||
|
|
||||||
catchret.dest: ; preds = %catch
|
|
||||||
br label %try.cont
|
|
||||||
|
|
||||||
try.cont: ; preds = %catchret.dest
|
|
||||||
ret void
|
|
||||||
|
|
||||||
unreachable: ; preds = %entry
|
|
||||||
unreachable
|
|
||||||
}
|
|
||||||
|
|
||||||
; CHECK: # %catch
|
|
||||||
; CHECK: movq %rdx, 16(%rsp)
|
|
||||||
; CHECK: pushq %rbp
|
|
||||||
; CHECK: .seh_pushreg 5
|
|
||||||
; CHECK: pushq %rbx
|
|
||||||
; CHECK: .seh_pushreg 3
|
|
||||||
; CHECK: subq $72, %rsp
|
|
||||||
; CHECK: .seh_stackalloc 72
|
|
||||||
; CHECK: leaq 80(%rdx), %rbp
|
|
||||||
; CHECK: vmovaps %xmm7, 48(%rsp)
|
|
||||||
; CHECK: .seh_savexmm 7, 48
|
|
||||||
; CHECK: vmovaps %xmm6, 32(%rsp)
|
|
||||||
; CHECK: .seh_savexmm 6, 32
|
|
||||||
; CHECK: .seh_endprologue
|
|
||||||
; CHECK: vmovaps 32(%rsp), %xmm6
|
|
||||||
; CHECK: vmovaps 48(%rsp), %xmm7
|
|
||||||
; CHECK: leaq .LBB0_3(%rip), %rax
|
|
||||||
; CHECK: addq $72, %rsp
|
|
||||||
; CHECK: popq %rbx
|
|
||||||
; CHECK: popq %rbp
|
|
||||||
; CHECK: retq # CATCHRET
|
|
||||||
|
|
||||||
; CHECK-LABEL: "$handlerMap$0$?foo@@YAXXZ":
|
|
||||||
; CHECK-NEXT: .long 0 # Adjectives
|
|
||||||
; CHECK-NEXT: .long "??_R0H@8"@IMGREL # Type
|
|
||||||
; CHECK-NEXT: .long 44 # CatchObjOffset
|
|
||||||
; CHECK-NEXT: .long "?catch${{.*}}??foo@@YAXXZ@4HA"@IMGREL # Handler
|
|
||||||
; Sum of:
|
|
||||||
; 16 RDX store offset
|
|
||||||
; 16 two pushes
|
|
||||||
; 72 stack alloc
|
|
||||||
; CHECK-NEXT: .long 104 # ParentFrameOffset
|
|
@ -294,7 +294,7 @@ define x86_intrcc void @foo(i8* %frame) {
|
|||||||
; CHECK64-SKX-NEXT: kmovq %k0, {{[0-9]+}}(%rsp) ## 8-byte Spill
|
; CHECK64-SKX-NEXT: kmovq %k0, {{[0-9]+}}(%rsp) ## 8-byte Spill
|
||||||
; CHECK64-SKX-NEXT: ## encoding: [0xc4,0xe1,0xf8,0x91,0x84,0x24,0x30,0x08,0x00,0x00]
|
; CHECK64-SKX-NEXT: ## encoding: [0xc4,0xe1,0xf8,0x91,0x84,0x24,0x30,0x08,0x00,0x00]
|
||||||
; CHECK64-SKX-NEXT: vmovups %zmm31, {{[0-9]+}}(%rsp) ## 64-byte Spill
|
; CHECK64-SKX-NEXT: vmovups %zmm31, {{[0-9]+}}(%rsp) ## 64-byte Spill
|
||||||
; CHECK64-SKX-NEXT: ## encoding: [0x62,0x61,0x7c,0x48,0x11,0x7c,0x24,0x1f]
|
; CHECK64-SKX-NEXT: ## encoding: [0x62,0x61,0x7c,0x48,0x11,0xbc,0x24,0xe0,0x07,0x00,0x00]
|
||||||
; CHECK64-SKX-NEXT: vmovups %zmm30, {{[0-9]+}}(%rsp) ## 64-byte Spill
|
; CHECK64-SKX-NEXT: vmovups %zmm30, {{[0-9]+}}(%rsp) ## 64-byte Spill
|
||||||
; CHECK64-SKX-NEXT: ## encoding: [0x62,0x61,0x7c,0x48,0x11,0x74,0x24,0x1e]
|
; CHECK64-SKX-NEXT: ## encoding: [0x62,0x61,0x7c,0x48,0x11,0x74,0x24,0x1e]
|
||||||
; CHECK64-SKX-NEXT: vmovups %zmm29, {{[0-9]+}}(%rsp) ## 64-byte Spill
|
; CHECK64-SKX-NEXT: vmovups %zmm29, {{[0-9]+}}(%rsp) ## 64-byte Spill
|
||||||
@ -398,7 +398,7 @@ define x86_intrcc void @foo(i8* %frame) {
|
|||||||
; CHECK64-SKX-NEXT: .cfi_offset %xmm28, -448
|
; CHECK64-SKX-NEXT: .cfi_offset %xmm28, -448
|
||||||
; CHECK64-SKX-NEXT: .cfi_offset %xmm29, -384
|
; CHECK64-SKX-NEXT: .cfi_offset %xmm29, -384
|
||||||
; CHECK64-SKX-NEXT: .cfi_offset %xmm30, -320
|
; CHECK64-SKX-NEXT: .cfi_offset %xmm30, -320
|
||||||
; CHECK64-SKX-NEXT: .cfi_offset %xmm31, -256
|
; CHECK64-SKX-NEXT: .cfi_offset %xmm31, -224
|
||||||
; CHECK64-SKX-NEXT: .cfi_offset %k0, -144
|
; CHECK64-SKX-NEXT: .cfi_offset %k0, -144
|
||||||
; CHECK64-SKX-NEXT: .cfi_offset %k1, -136
|
; CHECK64-SKX-NEXT: .cfi_offset %k1, -136
|
||||||
; CHECK64-SKX-NEXT: .cfi_offset %k2, -128
|
; CHECK64-SKX-NEXT: .cfi_offset %k2, -128
|
||||||
@ -474,7 +474,7 @@ define x86_intrcc void @foo(i8* %frame) {
|
|||||||
; CHECK64-SKX-NEXT: vmovups {{[0-9]+}}(%rsp), %zmm30 ## 64-byte Reload
|
; CHECK64-SKX-NEXT: vmovups {{[0-9]+}}(%rsp), %zmm30 ## 64-byte Reload
|
||||||
; CHECK64-SKX-NEXT: ## encoding: [0x62,0x61,0x7c,0x48,0x10,0x74,0x24,0x1e]
|
; CHECK64-SKX-NEXT: ## encoding: [0x62,0x61,0x7c,0x48,0x10,0x74,0x24,0x1e]
|
||||||
; CHECK64-SKX-NEXT: vmovups {{[0-9]+}}(%rsp), %zmm31 ## 64-byte Reload
|
; CHECK64-SKX-NEXT: vmovups {{[0-9]+}}(%rsp), %zmm31 ## 64-byte Reload
|
||||||
; CHECK64-SKX-NEXT: ## encoding: [0x62,0x61,0x7c,0x48,0x10,0x7c,0x24,0x1f]
|
; CHECK64-SKX-NEXT: ## encoding: [0x62,0x61,0x7c,0x48,0x10,0xbc,0x24,0xe0,0x07,0x00,0x00]
|
||||||
; CHECK64-SKX-NEXT: kmovq {{[0-9]+}}(%rsp), %k0 ## 8-byte Reload
|
; CHECK64-SKX-NEXT: kmovq {{[0-9]+}}(%rsp), %k0 ## 8-byte Reload
|
||||||
; CHECK64-SKX-NEXT: ## encoding: [0xc4,0xe1,0xf8,0x90,0x84,0x24,0x30,0x08,0x00,0x00]
|
; CHECK64-SKX-NEXT: ## encoding: [0xc4,0xe1,0xf8,0x90,0x84,0x24,0x30,0x08,0x00,0x00]
|
||||||
; CHECK64-SKX-NEXT: kmovq {{[0-9]+}}(%rsp), %k1 ## 8-byte Reload
|
; CHECK64-SKX-NEXT: kmovq {{[0-9]+}}(%rsp), %k1 ## 8-byte Reload
|
||||||
@ -635,7 +635,7 @@ define x86_intrcc void @foo(i8* %frame) {
|
|||||||
; CHECK32-SKX-NEXT: kmovq %k0, {{[0-9]+}}(%esp) ## 8-byte Spill
|
; CHECK32-SKX-NEXT: kmovq %k0, {{[0-9]+}}(%esp) ## 8-byte Spill
|
||||||
; CHECK32-SKX-NEXT: ## encoding: [0xc4,0xe1,0xf8,0x91,0x84,0x24,0x30,0x02,0x00,0x00]
|
; CHECK32-SKX-NEXT: ## encoding: [0xc4,0xe1,0xf8,0x91,0x84,0x24,0x30,0x02,0x00,0x00]
|
||||||
; CHECK32-SKX-NEXT: vmovups %zmm7, {{[0-9]+}}(%esp) ## 64-byte Spill
|
; CHECK32-SKX-NEXT: vmovups %zmm7, {{[0-9]+}}(%esp) ## 64-byte Spill
|
||||||
; CHECK32-SKX-NEXT: ## encoding: [0x62,0xf1,0x7c,0x48,0x11,0x7c,0x24,0x07]
|
; CHECK32-SKX-NEXT: ## encoding: [0x62,0xf1,0x7c,0x48,0x11,0xbc,0x24,0xe0,0x01,0x00,0x00]
|
||||||
; CHECK32-SKX-NEXT: vmovups %zmm6, {{[0-9]+}}(%esp) ## 64-byte Spill
|
; CHECK32-SKX-NEXT: vmovups %zmm6, {{[0-9]+}}(%esp) ## 64-byte Spill
|
||||||
; CHECK32-SKX-NEXT: ## encoding: [0x62,0xf1,0x7c,0x48,0x11,0x74,0x24,0x06]
|
; CHECK32-SKX-NEXT: ## encoding: [0x62,0xf1,0x7c,0x48,0x11,0x74,0x24,0x06]
|
||||||
; CHECK32-SKX-NEXT: vmovups %zmm5, {{[0-9]+}}(%esp) ## 64-byte Spill
|
; CHECK32-SKX-NEXT: vmovups %zmm5, {{[0-9]+}}(%esp) ## 64-byte Spill
|
||||||
@ -661,7 +661,7 @@ define x86_intrcc void @foo(i8* %frame) {
|
|||||||
; CHECK32-SKX-NEXT: .cfi_offset %xmm4, -384
|
; CHECK32-SKX-NEXT: .cfi_offset %xmm4, -384
|
||||||
; CHECK32-SKX-NEXT: .cfi_offset %xmm5, -320
|
; CHECK32-SKX-NEXT: .cfi_offset %xmm5, -320
|
||||||
; CHECK32-SKX-NEXT: .cfi_offset %xmm6, -256
|
; CHECK32-SKX-NEXT: .cfi_offset %xmm6, -256
|
||||||
; CHECK32-SKX-NEXT: .cfi_offset %xmm7, -192
|
; CHECK32-SKX-NEXT: .cfi_offset %xmm7, -160
|
||||||
; CHECK32-SKX-NEXT: .cfi_offset %k0, -80
|
; CHECK32-SKX-NEXT: .cfi_offset %k0, -80
|
||||||
; CHECK32-SKX-NEXT: .cfi_offset %k1, -72
|
; CHECK32-SKX-NEXT: .cfi_offset %k1, -72
|
||||||
; CHECK32-SKX-NEXT: .cfi_offset %k2, -64
|
; CHECK32-SKX-NEXT: .cfi_offset %k2, -64
|
||||||
@ -689,7 +689,7 @@ define x86_intrcc void @foo(i8* %frame) {
|
|||||||
; CHECK32-SKX-NEXT: vmovups {{[0-9]+}}(%esp), %zmm6 ## 64-byte Reload
|
; CHECK32-SKX-NEXT: vmovups {{[0-9]+}}(%esp), %zmm6 ## 64-byte Reload
|
||||||
; CHECK32-SKX-NEXT: ## encoding: [0x62,0xf1,0x7c,0x48,0x10,0x74,0x24,0x06]
|
; CHECK32-SKX-NEXT: ## encoding: [0x62,0xf1,0x7c,0x48,0x10,0x74,0x24,0x06]
|
||||||
; CHECK32-SKX-NEXT: vmovups {{[0-9]+}}(%esp), %zmm7 ## 64-byte Reload
|
; CHECK32-SKX-NEXT: vmovups {{[0-9]+}}(%esp), %zmm7 ## 64-byte Reload
|
||||||
; CHECK32-SKX-NEXT: ## encoding: [0x62,0xf1,0x7c,0x48,0x10,0x7c,0x24,0x07]
|
; CHECK32-SKX-NEXT: ## encoding: [0x62,0xf1,0x7c,0x48,0x10,0xbc,0x24,0xe0,0x01,0x00,0x00]
|
||||||
; CHECK32-SKX-NEXT: kmovq {{[0-9]+}}(%esp), %k0 ## 8-byte Reload
|
; CHECK32-SKX-NEXT: kmovq {{[0-9]+}}(%esp), %k0 ## 8-byte Reload
|
||||||
; CHECK32-SKX-NEXT: ## encoding: [0xc4,0xe1,0xf8,0x90,0x84,0x24,0x30,0x02,0x00,0x00]
|
; CHECK32-SKX-NEXT: ## encoding: [0xc4,0xe1,0xf8,0x90,0x84,0x24,0x30,0x02,0x00,0x00]
|
||||||
; CHECK32-SKX-NEXT: kmovq {{[0-9]+}}(%esp), %k1 ## 8-byte Reload
|
; CHECK32-SKX-NEXT: kmovq {{[0-9]+}}(%esp), %k1 ## 8-byte Reload
|
||||||
|
Loading…
x
Reference in New Issue
Block a user