1
0
mirror of https://github.com/RPCS3/llvm-mirror.git synced 2024-10-18 18:42:46 +02:00

[X86] Correct dwarf unwind information in function epilogue

CFI instructions that set appropriate cfa offset and cfa register are now
inserted in emitEpilogue() in X86FrameLowering.

Majority of the changes in this patch:

1. Ensure that CFI instructions do not affect code generation.
2. Enable maintaining correct information about cfa offset and cfa register
in a function when basic blocks are reordered, merged, split, duplicated.

These changes are target independent and described below.

Changed CFI instructions so that they:

1. are duplicable
2. are not counted as instructions when tail duplicating or tail merging
3. can be compared as equal

Add information to each MachineBasicBlock about cfa offset and cfa register
that are valid at its entry and exit (incoming and outgoing CFI info). Add
support for updating this information when basic blocks are merged, split,
duplicated, created. Add a verification pass (CFIInfoVerifier) that checks
that outgoing cfa offset and register of predecessor blocks match incoming
values of their successors.

Incoming and outgoing CFI information is used by a late pass
(CFIInstrInserter) that corrects CFA calculation rule for a basic block if
needed. That means that additional CFI instructions get inserted at basic
block beginning to correct the rule for calculating CFA. Having CFI
instructions in function epilogue can cause incorrect CFA calculation rule
for some basic blocks. This can happen if, due to basic block reordering,
or the existence of multiple epilogue blocks, some of the blocks have wrong
cfa offset and register values set by the epilogue block above them.

Patch by Violeta Vukobrat.

Differential Revision: https://reviews.llvm.org/D18046

llvm-svn: 306529
This commit is contained in:
Petar Jovanovic 2017-06-28 10:21:17 +00:00
parent a469f773c7
commit 0199002e6e
74 changed files with 2048 additions and 468 deletions

View File

@ -23,6 +23,7 @@
#include "llvm/CodeGen/MachineInstrBundleIterator.h" #include "llvm/CodeGen/MachineInstrBundleIterator.h"
#include "llvm/IR/DebugLoc.h" #include "llvm/IR/DebugLoc.h"
#include "llvm/MC/LaneBitmask.h" #include "llvm/MC/LaneBitmask.h"
#include "llvm/MC/MCDwarf.h"
#include "llvm/MC/MCRegisterInfo.h" #include "llvm/MC/MCRegisterInfo.h"
#include "llvm/Support/BranchProbability.h" #include "llvm/Support/BranchProbability.h"
#include <cassert> #include <cassert>
@ -757,6 +758,52 @@ private:
/// unless you know what you're doing, because it doesn't update Pred's /// unless you know what you're doing, because it doesn't update Pred's
/// successors list. Use Pred->removeSuccessor instead. /// successors list. Use Pred->removeSuccessor instead.
void removePredecessor(MachineBasicBlock *Pred); void removePredecessor(MachineBasicBlock *Pred);
// Value of cfa offset valid at basic block entry.
int IncomingCFAOffset = -1;
// Value of cfa offset valid at basic block exit.
int OutgoingCFAOffset = -1;
// Value of cfa register valid at basic block entry.
unsigned IncomingCFARegister = 0;
// Value of cfa register valid at basic block exit.
unsigned OutgoingCFARegister = 0;
// If a block contains a def_cfa_offset or def_cfa directive.
bool DefOffset = false;
// If a block contains a def_cfa_register or def_cfa directive.
bool DefRegister = false;
public:
int getIncomingCFAOffset() { return IncomingCFAOffset; }
void setIncomingCFAOffset(int Offset) { IncomingCFAOffset = Offset; }
int getOutgoingCFAOffset() { return OutgoingCFAOffset; }
void setOutgoingCFAOffset(int Offset) { OutgoingCFAOffset = Offset; }
unsigned getIncomingCFARegister() { return IncomingCFARegister; }
void setIncomingCFARegister(unsigned Register) {
IncomingCFARegister = Register;
}
unsigned getOutgoingCFARegister() { return OutgoingCFARegister; }
void setOutgoingCFARegister(unsigned Register) {
OutgoingCFARegister = Register;
}
bool hasDefOffset() { return DefOffset; }
bool hasDefRegister() { return DefRegister; }
void setDefOffset(bool SetsOffset) { DefOffset = SetsOffset; }
void setDefRegister(bool SetsRegister) { DefRegister = SetsRegister; }
// Update the outgoing cfa offset and register for this block based on the CFI
// instruction inserted at Pos.
void updateCFIInfo(MachineBasicBlock::iterator Pos);
// Update the cfa offset and register values for all successors of this block.
void updateCFIInfoSucc();
// Recalculate outgoing cfa offset and register. Use existing incoming offset
// and register values if UseExistingIncoming is set to true. If it is false,
// use new values passed as arguments.
void recalculateCFIInfo(bool UseExistingIncoming, int NewIncomingOffset = -1,
unsigned NewIncomingRegister = 0);
// Update outgoing cfa offset and register of the block after it is merged
// with MBB.
void mergeCFIInfo(MachineBasicBlock *MBB);
}; };
raw_ostream& operator<<(raw_ostream &OS, const MachineBasicBlock &MBB); raw_ostream& operator<<(raw_ostream &OS, const MachineBasicBlock &MBB);

View File

@ -789,6 +789,7 @@ public:
&& getOperand(1).isImm(); && getOperand(1).isImm();
} }
bool isDirective() const { return isDebugValue() || isCFIInstruction(); }
bool isPHI() const { return getOpcode() == TargetOpcode::PHI; } bool isPHI() const { return getOpcode() == TargetOpcode::PHI; }
bool isKill() const { return getOpcode() == TargetOpcode::KILL; } bool isKill() const { return getOpcode() == TargetOpcode::KILL; }
bool isImplicitDef() const { return getOpcode()==TargetOpcode::IMPLICIT_DEF; } bool isImplicitDef() const { return getOpcode()==TargetOpcode::IMPLICIT_DEF; }

View File

@ -420,6 +420,14 @@ namespace llvm {
/// shuffles. /// shuffles.
FunctionPass *createExpandReductionsPass(); FunctionPass *createExpandReductionsPass();
/// This pass verifies that outgoing cfa offset and register of predecessor
/// blocks match incoming cfa offset and register of their successors.
FunctionPass *createCFIInfoVerifier();
/// This pass inserts required CFI instruction at basic block beginning to
/// correct the CFA calculation rule for that block if necessary.
FunctionPass *createCFIInstrInserter();
} // End llvm namespace } // End llvm namespace
#endif #endif

View File

@ -87,6 +87,8 @@ void initializeCFGOnlyViewerLegacyPassPass(PassRegistry&);
void initializeCFGPrinterLegacyPassPass(PassRegistry&); void initializeCFGPrinterLegacyPassPass(PassRegistry&);
void initializeCFGSimplifyPassPass(PassRegistry&); void initializeCFGSimplifyPassPass(PassRegistry&);
void initializeCFGViewerLegacyPassPass(PassRegistry&); void initializeCFGViewerLegacyPassPass(PassRegistry&);
void initializeCFIInfoVerifierPass(PassRegistry&);
void initializeCFIInstrInserterPass(PassRegistry&);
void initializeCFLAndersAAWrapperPassPass(PassRegistry&); void initializeCFLAndersAAWrapperPassPass(PassRegistry&);
void initializeCFLSteensAAWrapperPassPass(PassRegistry&); void initializeCFLSteensAAWrapperPassPass(PassRegistry&);
void initializeCallGraphDOTPrinterPass(PassRegistry&); void initializeCallGraphDOTPrinterPass(PassRegistry&);

View File

@ -824,7 +824,7 @@ def CFI_INSTRUCTION : Instruction {
let InOperandList = (ins i32imm:$id); let InOperandList = (ins i32imm:$id);
let AsmString = ""; let AsmString = "";
let hasCtrlDep = 1; let hasCtrlDep = 1;
let isNotDuplicable = 1; let isNotDuplicable = 0;
} }
def EH_LABEL : Instruction { def EH_LABEL : Instruction {
let OutOperandList = (outs); let OutOperandList = (outs);

View File

@ -339,6 +339,19 @@ public:
return false; return false;
return true; return true;
} }
// Set initial incoming and outgoing cfa offset and register values for basic
// blocks. Initial values are the ones valid at the beginning of the function
// (before any stack operations). Incoming and outgoing cfa offset and
// register values are used to keep track of offset and register that are
// valid at basic block entry and exit. This information is used by a late
// pass that corrects the CFA calculation rule for a basic block if needed.
// Having CFI instructions in function epilogue can cause incorrect CFA
// calculation rule for some basic blocks. This can happen if, due to basic
// block reordering, or the existence of multiple epilogue blocks, some of the
// blocks have wrong cfa offset and register values set by the epilogue block
// above them.
virtual void initializeCFIInfo(MachineFunction & MF) const {}
}; };
} // End llvm namespace } // End llvm namespace

View File

@ -304,9 +304,9 @@ static unsigned ComputeCommonTailLength(MachineBasicBlock *MBB1,
while (I1 != MBB1->begin() && I2 != MBB2->begin()) { while (I1 != MBB1->begin() && I2 != MBB2->begin()) {
--I1; --I2; --I1; --I2;
// Skip debugging pseudos; necessary to avoid changing the code. // Skip debugging pseudos; necessary to avoid changing the code.
while (I1->isDebugValue()) { while (I1->isDirective()) {
if (I1==MBB1->begin()) { if (I1==MBB1->begin()) {
while (I2->isDebugValue()) { while (I2->isDirective()) {
if (I2==MBB2->begin()) if (I2==MBB2->begin())
// I1==DBG at begin; I2==DBG at begin // I1==DBG at begin; I2==DBG at begin
return TailLen; return TailLen;
@ -319,7 +319,7 @@ static unsigned ComputeCommonTailLength(MachineBasicBlock *MBB1,
--I1; --I1;
} }
// I1==first (untested) non-DBG preceding known match // I1==first (untested) non-DBG preceding known match
while (I2->isDebugValue()) { while (I2->isDirective()) {
if (I2==MBB2->begin()) { if (I2==MBB2->begin()) {
++I1; ++I1;
// I1==non-DBG, or first of DBGs not at begin; I2==DBG at begin // I1==non-DBG, or first of DBGs not at begin; I2==DBG at begin
@ -362,6 +362,35 @@ static unsigned ComputeCommonTailLength(MachineBasicBlock *MBB1,
} }
++I1; ++I1;
} }
// Ensure that I1 and I2 do not point to a CFI_INSTRUCTION. This can happen if
// I1 and I2 are non-identical when compared and then one or both of them ends
// up pointing to a CFI instruction after being incremented. For example:
/*
BB1:
...
INSTRUCTION_A
ADD32ri8 <- last common instruction
...
BB2:
...
INSTRUCTION_B
CFI_INSTRUCTION
ADD32ri8 <- last common instruction
...
*/
// When INSTRUCTION_A and INSTRUCTION_B are compared as not equal, after
// incrementing the iterators, I1 will point to ADD, however I2 will point to
// the CFI instruction. Later on, this leads to BB2 being 'hacked off' at the
// wrong place (in ReplaceTailWithBranchTo()) which results in losing this CFI
// instruction.
while (I1 != MBB1->end() && I1->isCFIInstruction()) {
++I1;
}
while (I2 != MBB2->end() && I2->isCFIInstruction()) {
++I2;
}
return TailLen; return TailLen;
} }
@ -417,6 +446,14 @@ MachineBasicBlock *BranchFolder::SplitMBBAt(MachineBasicBlock &CurMBB,
FuncletMembership[NewMBB] = n; FuncletMembership[NewMBB] = n;
} }
// Recalculate CFI info for CurMBB. Use existing incoming cfa offset and
// register.
CurMBB.recalculateCFIInfo(true);
// Recalculate CFI info for NewMBB. Use CurMBB's outgoing cfa offset and
// register as NewMBB's incoming.
NewMBB->recalculateCFIInfo(false, CurMBB.getOutgoingCFAOffset(),
CurMBB.getOutgoingCFARegister());
return NewMBB; return NewMBB;
} }
@ -426,7 +463,7 @@ static unsigned EstimateRuntime(MachineBasicBlock::iterator I,
MachineBasicBlock::iterator E) { MachineBasicBlock::iterator E) {
unsigned Time = 0; unsigned Time = 0;
for (; I != E; ++I) { for (; I != E; ++I) {
if (I->isDebugValue()) if (I->isDirective())
continue; continue;
if (I->isCall()) if (I->isCall())
Time += 10; Time += 10;
@ -780,7 +817,7 @@ void BranchFolder::MergeCommonTailDebugLocs(unsigned commonTailIndex) {
} }
for (auto &MI : *MBB) { for (auto &MI : *MBB) {
if (MI.isDebugValue()) if (MI.isDirective())
continue; continue;
DebugLoc DL = MI.getDebugLoc(); DebugLoc DL = MI.getDebugLoc();
for (unsigned int i = 0 ; i < NextCommonInsts.size() ; i++) { for (unsigned int i = 0 ; i < NextCommonInsts.size() ; i++) {
@ -790,7 +827,7 @@ void BranchFolder::MergeCommonTailDebugLocs(unsigned commonTailIndex) {
auto &Pos = NextCommonInsts[i]; auto &Pos = NextCommonInsts[i];
assert(Pos != SameTails[i].getBlock()->end() && assert(Pos != SameTails[i].getBlock()->end() &&
"Reached BB end within common tail"); "Reached BB end within common tail");
while (Pos->isDebugValue()) { while (Pos->isDirective()) {
++Pos; ++Pos;
assert(Pos != SameTails[i].getBlock()->end() && assert(Pos != SameTails[i].getBlock()->end() &&
"Reached BB end within common tail"); "Reached BB end within common tail");
@ -823,12 +860,12 @@ mergeOperations(MachineBasicBlock::iterator MBBIStartPos,
assert(MBBI != MBBIE && "Reached BB end within common tail length!"); assert(MBBI != MBBIE && "Reached BB end within common tail length!");
(void)MBBIE; (void)MBBIE;
if (MBBI->isDebugValue()) { if (MBBI->isDirective()) {
++MBBI; ++MBBI;
continue; continue;
} }
while ((MBBICommon != MBBIECommon) && MBBICommon->isDebugValue()) while ((MBBICommon != MBBIECommon) && MBBICommon->isDirective())
++MBBICommon; ++MBBICommon;
assert(MBBICommon != MBBIECommon && assert(MBBICommon != MBBIECommon &&
@ -971,6 +1008,11 @@ bool BranchFolder::TryTailMergeBlocks(MachineBasicBlock *SuccBB,
mergeOperations(SameTails[i].getTailStartPos(), *MBB); mergeOperations(SameTails[i].getTailStartPos(), *MBB);
// Hack the end off BB i, making it jump to BB commonTailIndex instead. // Hack the end off BB i, making it jump to BB commonTailIndex instead.
ReplaceTailWithBranchTo(SameTails[i].getTailStartPos(), MBB); ReplaceTailWithBranchTo(SameTails[i].getTailStartPos(), MBB);
// Recalculate CFI info for BB. Use existing incoming cfa offset and
// register.
SameTails[i].getBlock()->recalculateCFIInfo(true);
// BB i is no longer a predecessor of SuccBB; remove it from the worklist. // BB i is no longer a predecessor of SuccBB; remove it from the worklist.
MergePotentials.erase(SameTails[i].getMPIter()); MergePotentials.erase(SameTails[i].getMPIter());
} }
@ -1381,6 +1423,10 @@ ReoptimizeBlock:
assert(PrevBB.succ_empty()); assert(PrevBB.succ_empty());
PrevBB.transferSuccessors(MBB); PrevBB.transferSuccessors(MBB);
MadeChange = true; MadeChange = true;
// Update CFI info for PrevBB.
PrevBB.mergeCFIInfo(MBB);
return MadeChange; return MadeChange;
} }

View File

@ -0,0 +1,123 @@
//===----------- CFIInfoVerifier.cpp - CFI Information Verifier -----------===//
//
// The LLVM Compiler Infrastructure
//
// This file is distributed under the University of Illinois Open Source
// License. See LICENSE.TXT for details.
//
//===----------------------------------------------------------------------===//
//
// This pass verifies incoming and outgoing CFI information of basic blocks. CFI
// information is information about offset and register set by CFI directives,
// valid at the start and end of a basic block. This pass checks that outgoing
// information of predecessors matches incoming information of their successors.
//===----------------------------------------------------------------------===//
#include "llvm/CodeGen/MachineFunctionPass.h"
#include "llvm/CodeGen/MachineModuleInfo.h"
#include "llvm/CodeGen/Passes.h"
#include "llvm/Target/TargetMachine.h"
using namespace llvm;
namespace {
class CFIInfoVerifier : public MachineFunctionPass {
public:
static char ID;
CFIInfoVerifier() : MachineFunctionPass(ID) {
initializeCFIInfoVerifierPass(*PassRegistry::getPassRegistry());
}
void getAnalysisUsage(AnalysisUsage &AU) const override {
AU.setPreservesAll();
MachineFunctionPass::getAnalysisUsage(AU);
}
bool runOnMachineFunction(MachineFunction &MF) override {
bool NeedsDwarfCFI = (MF.getMMI().hasDebugInfo() ||
MF.getFunction()->needsUnwindTableEntry()) &&
(!MF.getTarget().getTargetTriple().isOSDarwin() &&
!MF.getTarget().getTargetTriple().isOSWindows());
if (!NeedsDwarfCFI) return false;
verify(MF);
return false;
}
private:
// Go through each MBB in a function and check that outgoing offset and
// register of its predecessors match incoming offset and register of that
// MBB, as well as that incoming offset and register of its successors match
// outgoing offset and register of the MBB.
void verify(MachineFunction &MF);
void report(const char *msg, MachineBasicBlock &MBB);
};
}
char CFIInfoVerifier::ID = 0;
INITIALIZE_PASS(CFIInfoVerifier, "cfiinfoverifier",
"Verify that corresponding in/out CFI info matches", false,
false)
FunctionPass *llvm::createCFIInfoVerifier() { return new CFIInfoVerifier(); }
void CFIInfoVerifier::verify(MachineFunction &MF) {
for (auto &CurrMBB : MF) {
for (auto Pred : CurrMBB.predecessors()) {
// Check that outgoing offset values of predecessors match the incoming
// offset value of CurrMBB
if (Pred->getOutgoingCFAOffset() != CurrMBB.getIncomingCFAOffset()) {
report("The outgoing offset of a predecessor is inconsistent.",
CurrMBB);
errs() << "Predecessor BB#" << Pred->getNumber()
<< " has outgoing offset (" << Pred->getOutgoingCFAOffset()
<< "), while BB#" << CurrMBB.getNumber()
<< " has incoming offset (" << CurrMBB.getIncomingCFAOffset()
<< ").\n";
}
// Check that outgoing register values of predecessors match the incoming
// register value of CurrMBB
if (Pred->getOutgoingCFARegister() != CurrMBB.getIncomingCFARegister()) {
report("The outgoing register of a predecessor is inconsistent.",
CurrMBB);
errs() << "Predecessor BB#" << Pred->getNumber()
<< " has outgoing register (" << Pred->getOutgoingCFARegister()
<< "), while BB#" << CurrMBB.getNumber()
<< " has incoming register (" << CurrMBB.getIncomingCFARegister()
<< ").\n";
}
}
for (auto Succ : CurrMBB.successors()) {
// Check that incoming offset values of successors match the outgoing
// offset value of CurrMBB
if (Succ->getIncomingCFAOffset() != CurrMBB.getOutgoingCFAOffset()) {
report("The incoming offset of a successor is inconsistent.", CurrMBB);
errs() << "Successor BB#" << Succ->getNumber()
<< " has incoming offset (" << Succ->getIncomingCFAOffset()
<< "), while BB#" << CurrMBB.getNumber()
<< " has outgoing offset (" << CurrMBB.getOutgoingCFAOffset()
<< ").\n";
}
// Check that incoming register values of successors match the outgoing
// register value of CurrMBB
if (Succ->getIncomingCFARegister() != CurrMBB.getOutgoingCFARegister()) {
report("The incoming register of a successor is inconsistent.",
CurrMBB);
errs() << "Successor BB#" << Succ->getNumber()
<< " has incoming register (" << Succ->getIncomingCFARegister()
<< "), while BB#" << CurrMBB.getNumber()
<< " has outgoing register (" << CurrMBB.getOutgoingCFARegister()
<< ").\n";
}
}
}
}
void CFIInfoVerifier::report(const char *msg, MachineBasicBlock &MBB) {
assert(&MBB);
errs() << '\n';
errs() << "*** " << msg << " ***\n"
<< "- function: " << MBB.getParent()->getName() << "\n";
errs() << "- basic block: BB#" << MBB.getNumber() << ' ' << MBB.getName()
<< " (" << (const void *)&MBB << ')';
errs() << '\n';
}

View File

@ -0,0 +1,124 @@
//===------ CFIInstrInserter.cpp - Insert additional CFI instructions -----===//
//
// The LLVM Compiler Infrastructure
//
// This file is distributed under the University of Illinois Open Source
// License. See LICENSE.TXT for details.
//
//===----------------------------------------------------------------------===//
//
// Insert CFI instructions at the beginnings of basic blocks if needed. CFI
// instructions are inserted if basic blocks have incorrect offset or register
// set by prevoius blocks.
//
//===----------------------------------------------------------------------===//
#include "llvm/CodeGen/MachineFunctionPass.h"
#include "llvm/CodeGen/MachineInstrBuilder.h"
#include "llvm/CodeGen/MachineModuleInfo.h"
#include "llvm/CodeGen/Passes.h"
#include "llvm/Target/TargetInstrInfo.h"
#include "llvm/Target/TargetMachine.h"
#include "llvm/Target/TargetSubtargetInfo.h"
using namespace llvm;
namespace {
class CFIInstrInserter : public MachineFunctionPass {
public:
CFIInstrInserter() : MachineFunctionPass(ID) {
initializeCFIInstrInserterPass(*PassRegistry::getPassRegistry());
}
bool runOnMachineFunction(MachineFunction &MF) override;
static char ID;
private:
StringRef getPassName() const override { return "CFI Instruction Inserter"; }
// Check if incoming CFI information of a basic block matches outgoing CFI
// information of the previous block. If it doesn't, insert CFI instruction at
// the beginning of the block that corrects the CFA calculation rule for that
// block.
void CorrectCFA(MachineFunction &MF);
// Return the cfa offset value that should be set at the beginning of MBB if
// needed. The negated value is needed when creating CFI instructions that set
// absolute offset.
int getCorrectCFAOffset(MachineBasicBlock &MBB) {
return -MBB.getIncomingCFAOffset();
}
// Were any CFI instructions inserted
bool InsertedCFIInstr = false;
};
}
char CFIInstrInserter::ID = 0;
INITIALIZE_PASS(CFIInstrInserter, "cfiinstrinserter",
"Check CFI info and insert CFI instructions if needed", false,
false)
FunctionPass *llvm::createCFIInstrInserter() { return new CFIInstrInserter(); }
bool CFIInstrInserter::runOnMachineFunction(MachineFunction &MF) {
bool NeedsDwarfCFI = (MF.getMMI().hasDebugInfo() ||
MF.getFunction()->needsUnwindTableEntry()) &&
(!MF.getTarget().getTargetTriple().isOSDarwin() &&
!MF.getTarget().getTargetTriple().isOSWindows());
if (!NeedsDwarfCFI) return false;
// Insert appropriate CFI instructions for each MBB if CFA calculation rule
// needs to be corrected for that MBB.
CorrectCFA(MF);
return InsertedCFIInstr;
}
void CFIInstrInserter::CorrectCFA(MachineFunction &MF) {
MachineBasicBlock &FirstMBB = MF.front();
MachineBasicBlock *PrevMBB = &FirstMBB;
const TargetInstrInfo *TII = MF.getSubtarget().getInstrInfo();
InsertedCFIInstr = false;
for (auto &MBB : MF) {
// Skip the first MBB in a function
if (MBB.getNumber() == FirstMBB.getNumber()) continue;
auto MBBI = MBB.begin();
DebugLoc DL = MBB.findDebugLoc(MBBI);
if (PrevMBB->getOutgoingCFAOffset() != MBB.getIncomingCFAOffset()) {
// If both outgoing offset and register of a previous block don't match
// incoming offset and register of this block, add a def_cfa instruction
// with the correct offset and register for this block.
if (PrevMBB->getOutgoingCFARegister() != MBB.getIncomingCFARegister()) {
unsigned CFIIndex = MF.addFrameInst(MCCFIInstruction::createDefCfa(
nullptr, MBB.getIncomingCFARegister(), getCorrectCFAOffset(MBB)));
BuildMI(MBB, MBBI, DL, TII->get(TargetOpcode::CFI_INSTRUCTION))
.addCFIIndex(CFIIndex);
// If outgoing offset of a previous block doesn't match incoming offset
// of this block, add a def_cfa_offset instruction with the correct
// offset for this block.
} else {
unsigned CFIIndex =
MF.addFrameInst(MCCFIInstruction::createDefCfaOffset(
nullptr, getCorrectCFAOffset(MBB)));
BuildMI(MBB, MBBI, DL, TII->get(TargetOpcode::CFI_INSTRUCTION))
.addCFIIndex(CFIIndex);
}
InsertedCFIInstr = true;
// If outgoing register of a previous block doesn't match incoming
// register of this block, add a def_cfa_register instruction with the
// correct register for this block.
} else if (PrevMBB->getOutgoingCFARegister() !=
MBB.getIncomingCFARegister()) {
unsigned CFIIndex =
MF.addFrameInst(MCCFIInstruction::createDefCfaRegister(
nullptr, MBB.getIncomingCFARegister()));
BuildMI(MBB, MBBI, DL, TII->get(TargetOpcode::CFI_INSTRUCTION))
.addCFIIndex(CFIIndex);
InsertedCFIInstr = true;
}
PrevMBB = &MBB;
}
}

View File

@ -10,6 +10,8 @@ add_llvm_library(LLVMCodeGen
BuiltinGCs.cpp BuiltinGCs.cpp
CalcSpillWeights.cpp CalcSpillWeights.cpp
CallingConvLower.cpp CallingConvLower.cpp
CFIInfoVerifier.cpp
CFIInstrInserter.cpp
CodeGen.cpp CodeGen.cpp
CodeGenPrepare.cpp CodeGenPrepare.cpp
CountingFunctionInserter.cpp CountingFunctionInserter.cpp

View File

@ -24,6 +24,8 @@ void llvm::initializeCodeGen(PassRegistry &Registry) {
initializeBranchCoalescingPass(Registry); initializeBranchCoalescingPass(Registry);
initializeBranchFolderPassPass(Registry); initializeBranchFolderPassPass(Registry);
initializeBranchRelaxationPass(Registry); initializeBranchRelaxationPass(Registry);
initializeCFIInfoVerifierPass(Registry);
initializeCFIInstrInserterPass(Registry);
initializeCodeGenPreparePass(Registry); initializeCodeGenPreparePass(Registry);
initializeCountingFunctionInserterPass(Registry); initializeCountingFunctionInserterPass(Registry);
initializeDeadMachineInstructionElimPass(Registry); initializeDeadMachineInstructionElimPass(Registry);

View File

@ -35,6 +35,8 @@
#include "llvm/Target/TargetRegisterInfo.h" #include "llvm/Target/TargetRegisterInfo.h"
#include "llvm/Target/TargetSubtargetInfo.h" #include "llvm/Target/TargetSubtargetInfo.h"
#include <algorithm> #include <algorithm>
#include <queue>
#include <set>
using namespace llvm; using namespace llvm;
#define DEBUG_TYPE "codegen" #define DEBUG_TYPE "codegen"
@ -1343,3 +1345,228 @@ MachineBasicBlock::livein_iterator MachineBasicBlock::livein_begin() const {
"Liveness information is accurate"); "Liveness information is accurate");
return LiveIns.begin(); return LiveIns.begin();
} }
void MachineBasicBlock::updateCFIInfo(MachineBasicBlock::iterator Pos) {
// Used for calculating outgoing cfa offset when CFI instruction added at Pos
// is def_cfa or def_cfa_offset.
/* For example:
...
.cfi_adjust_cfa_offset 4
...
.cfi_adjust_cfa_offset 4
...
.cfi_def_cfa_offset 16 <---- newly added CFI instruction at Pos
...
.cfi_adjust_cfa_offset 4
...
Once def_cfa_offset is inserted, outgoing cfa offset is no longer
calculated as incoming offset incremented by the sum of all adjustments
(12). It becomes equal to the offset set by the added CFI instruction (16)
incremented by the sum of adjustments below it (4). Adjustments above the
added def_cfa_offset directive don't have effect below it anymore and
therefore don't affect the value of outgoing cfa offset.
*/
int AdjustAmount = 0;
// Used to check if outgoing cfa offset should be updated or not (when def_cfa
// is inserted).
bool ShouldSetOffset = true;
// Used to check if outgoing cfa register should be updated or not (when
// def_cfa is inserted).
bool ShouldSetRegister = true;
const std::vector<MCCFIInstruction> CFIInstructions =
getParent()->getFrameInstructions();
MCCFIInstruction CFI = CFIInstructions[Pos->getOperand(0).getCFIIndex()];
// Type of the CFI instruction that was inserted.
MCCFIInstruction::OpType CFIType = CFI.getOperation();
// Check if there are already existing CFI instructions below Pos and see if
// outgoing CFI info should be updated or not.
for (MachineBasicBlock::reverse_iterator RI = rbegin();
RI != Pos.getReverse(); ++RI) {
if (RI->isCFIInstruction()) {
MCCFIInstruction::OpType RIType =
CFIInstructions[RI->getOperand(0).getCFIIndex()].getOperation();
switch (RIType) {
case MCCFIInstruction::OpAdjustCfaOffset:
AdjustAmount +=
CFIInstructions[RI->getOperand(0).getCFIIndex()].getOffset();
break;
case MCCFIInstruction::OpDefCfaOffset:
// CFI instruction doesn't affect outgoing cfa offset if there is
// already a def_cfa_offset instruction below it.
if (CFIType == MCCFIInstruction::OpDefCfaOffset ||
CFIType == MCCFIInstruction::OpAdjustCfaOffset)
return;
if (CFIType == MCCFIInstruction::OpDefCfa) {
// CFI instruction doesn't affect outgoing cfa offset and register
// if there are both def_cfa_offset and def_cfa_register
// instructions below it.
if (!ShouldSetRegister) return;
ShouldSetOffset = false;
}
break;
case MCCFIInstruction::OpDefCfaRegister:
// CFI instruction doesn't affect outgoing cfa register if there is
// already a def_cfa_register instruction below it.
if (CFIType == MCCFIInstruction::OpDefCfaRegister) return;
if (CFIType == MCCFIInstruction::OpDefCfa) {
// CFI instruction doesn't affect outgoing cfa offset and register
// if there are both def_cfa_offset and def_cfa_register
// instructions below it.
if (!ShouldSetOffset) return;
ShouldSetRegister = false;
}
break;
case MCCFIInstruction::OpDefCfa:
// CFI instruction doesn't affect outgoing cfa offset and register if
// there is already a def_cfa instruction below it.
if (CFIType == MCCFIInstruction::OpDefCfaRegister ||
CFIType == MCCFIInstruction::OpDefCfaOffset ||
CFIType == MCCFIInstruction::OpDefCfa ||
CFIType == MCCFIInstruction::OpAdjustCfaOffset)
return;
break;
default:
break;
}
}
}
// Update the outgoing CFI info based on the added CFI instruction.
switch (CFIType) {
case MCCFIInstruction::OpAdjustCfaOffset:
setOutgoingCFAOffset(getOutgoingCFAOffset() + CFI.getOffset());
break;
case MCCFIInstruction::OpDefCfaOffset:
setOutgoingCFAOffset(CFI.getOffset() + AdjustAmount);
break;
case MCCFIInstruction::OpDefCfaRegister:
setOutgoingCFARegister(CFI.getRegister());
break;
case MCCFIInstruction::OpDefCfa:
if (ShouldSetOffset) setOutgoingCFAOffset(CFI.getOffset() + AdjustAmount);
if (ShouldSetRegister) setOutgoingCFARegister(CFI.getRegister());
break;
default:
break;
}
}
void MachineBasicBlock::updateCFIInfoSucc() {
// Blocks whose successors' CFI info should be updated.
std::queue<MachineBasicBlock *> Successors;
// Keep track of basic blocks that have already been put in the Successors
// queue.
std::set<MachineBasicBlock *> ProcessedMBBs;
// Start with updating CFI info for direct successors of this block.
Successors.push(this);
ProcessedMBBs.insert(this);
// Go through the successors and update their CFI info if needed.
while (!Successors.empty()) {
MachineBasicBlock *CurrSucc = Successors.front();
Successors.pop();
// Update CFI info for CurrSucc's successors.
for (auto Succ : CurrSucc->successors()) {
if (ProcessedMBBs.find(Succ) != ProcessedMBBs.end()) continue;
if (Succ->getIncomingCFAOffset() == CurrSucc->getOutgoingCFAOffset() &&
Succ->getIncomingCFARegister() == CurrSucc->getOutgoingCFARegister())
continue;
bool ChangedOutgoingInfo = false;
// Do not update cfa offset if the existing value matches the new.
if (Succ->getIncomingCFAOffset() != CurrSucc->getOutgoingCFAOffset()) {
// If the block doesn't have a def_cfa_offset or def_cfa directive,
// update its outgoing offset.
if (!Succ->hasDefOffset()) {
// Succ block doesn't set absolute offset, so the difference between
// outgoing and incoming offset remains the same. This difference is
// the sum of offsets set by adjust_cfa_offset directives.
int AdjustAmount =
Succ->getOutgoingCFAOffset() - Succ->getIncomingCFAOffset();
Succ->setOutgoingCFAOffset(CurrSucc->getOutgoingCFAOffset() +
AdjustAmount);
ChangedOutgoingInfo = true;
}
Succ->setIncomingCFAOffset(CurrSucc->getOutgoingCFAOffset());
}
// Do not update cfa register if the existing value matches the new.
if (Succ->getIncomingCFARegister() !=
CurrSucc->getOutgoingCFARegister()) {
Succ->setIncomingCFARegister(CurrSucc->getOutgoingCFARegister());
// If the block doesn't have a def_cfa_register or def_cfa directive,
// update its outgoing register.
if (!Succ->hasDefRegister()) {
Succ->setOutgoingCFARegister(Succ->getIncomingCFARegister());
ChangedOutgoingInfo = true;
}
}
// If Succ's outgoing CFI info has been changed, it's successors should be
// updated as well.
if (ChangedOutgoingInfo) {
Successors.push(Succ);
ProcessedMBBs.insert(Succ);
}
}
}
}
void MachineBasicBlock::recalculateCFIInfo(bool UseExistingIncoming,
int NewIncomingOffset,
unsigned NewIncomingRegister) {
// Outgoing cfa offset set by the block.
int SetOffset;
// Outgoing cfa register set by the block.
unsigned SetRegister;
const std::vector<MCCFIInstruction> &Instrs =
getParent()->getFrameInstructions();
// Set initial values to SetOffset and SetRegister. Use existing incoming
// values or values passed as arguments.
if (!UseExistingIncoming) {
// Set new incoming cfa offset and register values.
setIncomingCFAOffset(NewIncomingOffset);
setIncomingCFARegister(NewIncomingRegister);
}
SetOffset = getIncomingCFAOffset();
SetRegister = getIncomingCFARegister();
setDefOffset(false);
setDefRegister(false);
// Determine cfa offset and register set by the block.
for (MachineBasicBlock::iterator MI = begin(); MI != end(); ++MI) {
if (MI->isCFIInstruction()) {
unsigned CFIIndex = MI->getOperand(0).getCFIIndex();
const MCCFIInstruction &CFI = Instrs[CFIIndex];
if (CFI.getOperation() == MCCFIInstruction::OpDefCfaRegister) {
SetRegister = CFI.getRegister();
setDefRegister(true);
} else if (CFI.getOperation() == MCCFIInstruction::OpDefCfaOffset) {
SetOffset = CFI.getOffset();
setDefOffset(true);
} else if (CFI.getOperation() == MCCFIInstruction::OpAdjustCfaOffset) {
SetOffset = SetOffset + CFI.getOffset();
} else if (CFI.getOperation() == MCCFIInstruction::OpDefCfa) {
SetRegister = CFI.getRegister();
SetOffset = CFI.getOffset();
setDefOffset(true);
setDefRegister(true);
}
}
}
// Update outgoing CFI info.
setOutgoingCFAOffset(SetOffset);
setOutgoingCFARegister(SetRegister);
}
void MachineBasicBlock::mergeCFIInfo(MachineBasicBlock *MBB) {
// Update CFI info. This basic block acquires MBB's outgoing cfa offset and
// register values.
setOutgoingCFAOffset(MBB->getOutgoingCFAOffset());
setOutgoingCFARegister(MBB->getOutgoingCFARegister());
setDefOffset(hasDefOffset() || MBB->hasDefOffset());
setDefRegister(hasDefRegister() || MBB->hasDefRegister());
}

View File

@ -305,8 +305,33 @@ bool MachineOperand::isIdenticalTo(const MachineOperand &Other) const {
} }
case MachineOperand::MO_MCSymbol: case MachineOperand::MO_MCSymbol:
return getMCSymbol() == Other.getMCSymbol(); return getMCSymbol() == Other.getMCSymbol();
case MachineOperand::MO_CFIIndex: case MachineOperand::MO_CFIIndex: {
return getCFIIndex() == Other.getCFIIndex(); const MachineFunction *MF = getParent()->getParent()->getParent();
const MachineFunction *OtherMF =
Other.getParent()->getParent()->getParent();
MCCFIInstruction Inst = MF->getFrameInstructions()[getCFIIndex()];
MCCFIInstruction OtherInst =
OtherMF->getFrameInstructions()[Other.getCFIIndex()];
MCCFIInstruction::OpType op = Inst.getOperation();
if (op != OtherInst.getOperation()) return false;
if (op == MCCFIInstruction::OpDefCfa || op == MCCFIInstruction::OpOffset ||
op == MCCFIInstruction::OpRestore ||
op == MCCFIInstruction::OpUndefined ||
op == MCCFIInstruction::OpSameValue ||
op == MCCFIInstruction::OpDefCfaRegister ||
op == MCCFIInstruction::OpRelOffset ||
op == MCCFIInstruction::OpRegister)
if (Inst.getRegister() != OtherInst.getRegister()) return false;
if (op == MCCFIInstruction::OpRegister)
if (Inst.getRegister2() != OtherInst.getRegister2()) return false;
if (op == MCCFIInstruction::OpDefCfa || op == MCCFIInstruction::OpOffset ||
op == MCCFIInstruction::OpRelOffset ||
op == MCCFIInstruction::OpDefCfaOffset ||
op == MCCFIInstruction::OpAdjustCfaOffset ||
op == MCCFIInstruction::OpGnuArgsSize)
if (Inst.getOffset() != OtherInst.getOffset()) return false;
return true;
}
case MachineOperand::MO_Metadata: case MachineOperand::MO_Metadata:
return getMetadata() == Other.getMetadata(); return getMetadata() == Other.getMetadata();
case MachineOperand::MO_IntrinsicID: case MachineOperand::MO_IntrinsicID:
@ -355,8 +380,13 @@ hash_code llvm::hash_value(const MachineOperand &MO) {
return hash_combine(MO.getType(), MO.getTargetFlags(), MO.getMetadata()); return hash_combine(MO.getType(), MO.getTargetFlags(), MO.getMetadata());
case MachineOperand::MO_MCSymbol: case MachineOperand::MO_MCSymbol:
return hash_combine(MO.getType(), MO.getTargetFlags(), MO.getMCSymbol()); return hash_combine(MO.getType(), MO.getTargetFlags(), MO.getMCSymbol());
case MachineOperand::MO_CFIIndex: case MachineOperand::MO_CFIIndex: {
return hash_combine(MO.getType(), MO.getTargetFlags(), MO.getCFIIndex()); const MachineFunction *MF = MO.getParent()->getParent()->getParent();
MCCFIInstruction Inst = MF->getFrameInstructions()[MO.getCFIIndex()];
return hash_combine(MO.getType(), MO.getTargetFlags(), Inst.getOperation(),
Inst.getRegister(), Inst.getRegister2(),
Inst.getOffset());
}
case MachineOperand::MO_IntrinsicID: case MachineOperand::MO_IntrinsicID:
return hash_combine(MO.getType(), MO.getTargetFlags(), MO.getIntrinsicID()); return hash_combine(MO.getType(), MO.getTargetFlags(), MO.getIntrinsicID());
case MachineOperand::MO_Predicate: case MachineOperand::MO_Predicate:

View File

@ -977,6 +977,10 @@ void PEI::calculateFrameObjectOffsets(MachineFunction &Fn) {
void PEI::insertPrologEpilogCode(MachineFunction &Fn) { void PEI::insertPrologEpilogCode(MachineFunction &Fn) {
const TargetFrameLowering &TFI = *Fn.getSubtarget().getFrameLowering(); const TargetFrameLowering &TFI = *Fn.getSubtarget().getFrameLowering();
// Set initial incoming and outgoing cfa offset and register values for basic
// blocks.
TFI.initializeCFIInfo(Fn);
// Add prologue to the function... // Add prologue to the function...
for (MachineBasicBlock *SaveBlock : SaveBlocks) for (MachineBasicBlock *SaveBlock : SaveBlocks)
TFI.emitPrologue(Fn, *SaveBlock); TFI.emitPrologue(Fn, *SaveBlock);

View File

@ -604,7 +604,7 @@ bool TailDuplicator::shouldTailDuplicate(bool IsSimple,
if (PreRegAlloc && MI.isCall()) if (PreRegAlloc && MI.isCall())
return false; return false;
if (!MI.isPHI() && !MI.isDebugValue()) if (!MI.isPHI() && !MI.isDirective())
InstrCount += 1; InstrCount += 1;
if (InstrCount > MaxDuplicateCount) if (InstrCount > MaxDuplicateCount)
@ -857,6 +857,9 @@ bool TailDuplicator::tailDuplicate(bool IsSimple, MachineBasicBlock *TailBB,
for (MachineBasicBlock *Succ : TailBB->successors()) for (MachineBasicBlock *Succ : TailBB->successors())
PredBB->addSuccessor(Succ, MBPI->getEdgeProbability(TailBB, Succ)); PredBB->addSuccessor(Succ, MBPI->getEdgeProbability(TailBB, Succ));
// Update the CFI info for PredBB.
PredBB->mergeCFIInfo(TailBB);
Changed = true; Changed = true;
++NumTailDups; ++NumTailDups;
} }
@ -917,6 +920,9 @@ bool TailDuplicator::tailDuplicate(bool IsSimple, MachineBasicBlock *TailBB,
PrevBB->transferSuccessors(TailBB); PrevBB->transferSuccessors(TailBB);
TDBBs.push_back(PrevBB); TDBBs.push_back(PrevBB);
Changed = true; Changed = true;
// Update the CFI info for PrevBB.
PrevBB->mergeCFIInfo(TailBB);
} }
// If this is after register allocation, there are no phis to fix. // If this is after register allocation, there are no phis to fix.

View File

@ -769,8 +769,15 @@ void TargetPassConfig::addMachinePasses() {
if (getOptLevel() != CodeGenOpt::None) if (getOptLevel() != CodeGenOpt::None)
addBlockPlacement(); addBlockPlacement();
// Verify basic block incoming and outgoing cfa offset and register values.
addPass(createCFIInfoVerifier());
addPreEmitPass(); addPreEmitPass();
// Correct CFA calculation rule where needed by inserting appropriate CFI
// instructions.
addPass(createCFIInstrInserter(), false);
if (TM->Options.EnableIPRA) if (TM->Options.EnableIPRA)
// Collect register usage information and produce a register mask of // Collect register usage information and produce a register mask of
// clobbered registers, to be used to optimize call sites. // clobbered registers, to be used to optimize call sites.

View File

@ -234,6 +234,12 @@ bool X86CallFrameOptimization::runOnMachineFunction(MachineFunction &MF) {
assert(isPowerOf2_32(SlotSize) && "Expect power of 2 stack slot size"); assert(isPowerOf2_32(SlotSize) && "Expect power of 2 stack slot size");
Log2SlotSize = Log2_32(SlotSize); Log2SlotSize = Log2_32(SlotSize);
// Set initial incoming and outgoing cfa offset and register values for basic
// blocks. This is done here because this pass runs before PEI and can insert
// CFI instructions.
// TODO: Find a better solution to this problem.
TFL->initializeCFIInfo(MF);
if (skipFunction(*MF.getFunction()) || !isLegal(MF)) if (skipFunction(*MF.getFunction()) || !isLegal(MF))
return false; return false;
@ -536,11 +542,13 @@ void X86CallFrameOptimization::adjustCallSequence(MachineFunction &MF,
// For debugging, when using SP-based CFA, we need to adjust the CFA // For debugging, when using SP-based CFA, we need to adjust the CFA
// offset after each push. // offset after each push.
// TODO: This is needed only if we require precise CFA. // TODO: This is needed only if we require precise CFA.
if (!TFL->hasFP(MF)) if (!TFL->hasFP(MF)) {
TFL->BuildCFI( TFL->BuildCFI(MBB, std::next(Push), DL,
MBB, std::next(Push), DL,
MCCFIInstruction::createAdjustCfaOffset(nullptr, SlotSize)); MCCFIInstruction::createAdjustCfaOffset(nullptr, SlotSize));
// Update the CFI information for MBB and it's successors.
MBB.updateCFIInfo(std::next(Push));
MBB.updateCFIInfoSucc();
}
MBB.erase(MOV); MBB.erase(MOV);
} }

View File

@ -958,6 +958,7 @@ void X86FrameLowering::emitPrologue(MachineFunction &MF,
? getX86SubSuperRegister(FramePtr, 64) : FramePtr; ? getX86SubSuperRegister(FramePtr, 64) : FramePtr;
unsigned BasePtr = TRI->getBaseRegister(); unsigned BasePtr = TRI->getBaseRegister();
bool HasWinCFI = false; bool HasWinCFI = false;
bool InsertedCFI = false;
// Debug location must be unknown since the first debug location is used // Debug location must be unknown since the first debug location is used
// to determine the end of the prologue. // to determine the end of the prologue.
@ -1093,6 +1094,9 @@ void X86FrameLowering::emitPrologue(MachineFunction &MF,
assert(StackSize); assert(StackSize);
BuildCFI(MBB, MBBI, DL, BuildCFI(MBB, MBBI, DL,
MCCFIInstruction::createDefCfaOffset(nullptr, 2 * stackGrowth)); MCCFIInstruction::createDefCfaOffset(nullptr, 2 * stackGrowth));
MBB.setDefOffset(true);
MBB.updateCFIInfo(std::prev(MBBI));
InsertedCFI = true;
// Change the rule for the FramePtr to be an "offset" rule. // Change the rule for the FramePtr to be an "offset" rule.
unsigned DwarfFramePtr = TRI->getDwarfRegNum(MachineFramePtr, true); unsigned DwarfFramePtr = TRI->getDwarfRegNum(MachineFramePtr, true);
@ -1121,6 +1125,9 @@ void X86FrameLowering::emitPrologue(MachineFunction &MF,
unsigned DwarfFramePtr = TRI->getDwarfRegNum(MachineFramePtr, true); unsigned DwarfFramePtr = TRI->getDwarfRegNum(MachineFramePtr, true);
BuildCFI(MBB, MBBI, DL, MCCFIInstruction::createDefCfaRegister( BuildCFI(MBB, MBBI, DL, MCCFIInstruction::createDefCfaRegister(
nullptr, DwarfFramePtr)); nullptr, DwarfFramePtr));
MBB.setDefRegister(true);
MBB.updateCFIInfo(std::prev(MBBI));
InsertedCFI = true;
} }
} }
} else { } else {
@ -1152,6 +1159,9 @@ void X86FrameLowering::emitPrologue(MachineFunction &MF,
assert(StackSize); assert(StackSize);
BuildCFI(MBB, MBBI, DL, BuildCFI(MBB, MBBI, DL,
MCCFIInstruction::createDefCfaOffset(nullptr, StackOffset)); MCCFIInstruction::createDefCfaOffset(nullptr, StackOffset));
MBB.setDefOffset(true);
MBB.updateCFIInfo(std::prev(MBBI));
InsertedCFI = true;
StackOffset += stackGrowth; StackOffset += stackGrowth;
} }
@ -1417,6 +1427,9 @@ void X86FrameLowering::emitPrologue(MachineFunction &MF,
assert(StackSize); assert(StackSize);
BuildCFI(MBB, MBBI, DL, MCCFIInstruction::createDefCfaOffset( BuildCFI(MBB, MBBI, DL, MCCFIInstruction::createDefCfaOffset(
nullptr, -StackSize + stackGrowth)); nullptr, -StackSize + stackGrowth));
MBB.setDefOffset(true);
MBB.updateCFIInfo(std::prev(MBBI));
InsertedCFI = true;
} }
// Emit DWARF info specifying the offsets of the callee-saved registers. // Emit DWARF info specifying the offsets of the callee-saved registers.
@ -1438,6 +1451,9 @@ void X86FrameLowering::emitPrologue(MachineFunction &MF,
// At this point we know if the function has WinCFI or not. // At this point we know if the function has WinCFI or not.
MF.setHasWinCFI(HasWinCFI); MF.setHasWinCFI(HasWinCFI);
if (InsertedCFI)
MBB.updateCFIInfoSucc();
} }
bool X86FrameLowering::canUseLEAForSPInEpilogue( bool X86FrameLowering::canUseLEAForSPInEpilogue(
@ -1548,6 +1564,12 @@ void X86FrameLowering::emitEpilogue(MachineFunction &MF,
unsigned CSSize = X86FI->getCalleeSavedFrameSize(); unsigned CSSize = X86FI->getCalleeSavedFrameSize();
uint64_t NumBytes = 0; uint64_t NumBytes = 0;
bool NeedsDwarfCFI = (MF.getMMI().hasDebugInfo() ||
MF.getFunction()->needsUnwindTableEntry()) &&
(!MF.getSubtarget<X86Subtarget>().isTargetDarwin() &&
!MF.getSubtarget<X86Subtarget>().isOSWindows());
bool InsertedCFI = false;
if (RetOpcode && *RetOpcode == X86::CATCHRET) { if (RetOpcode && *RetOpcode == X86::CATCHRET) {
// SEH shouldn't use catchret. // SEH shouldn't use catchret.
assert(!isAsynchronousEHPersonality( assert(!isAsynchronousEHPersonality(
@ -1582,6 +1604,17 @@ void X86FrameLowering::emitEpilogue(MachineFunction &MF,
BuildMI(MBB, MBBI, DL, BuildMI(MBB, MBBI, DL,
TII.get(Is64Bit ? X86::POP64r : X86::POP32r), MachineFramePtr) TII.get(Is64Bit ? X86::POP64r : X86::POP32r), MachineFramePtr)
.setMIFlag(MachineInstr::FrameDestroy); .setMIFlag(MachineInstr::FrameDestroy);
if (NeedsDwarfCFI) {
unsigned DwarfStackPtr =
TRI->getDwarfRegNum(Is64Bit ? X86::RSP : X86::ESP, true);
BuildCFI(MBB, MBBI, DL, MCCFIInstruction::createDefCfa(
nullptr, DwarfStackPtr, -SlotSize));
--MBBI;
MBB.setDefOffset(true);
MBB.setDefRegister(true);
MBB.updateCFIInfo(MBBI);
InsertedCFI = true;
}
} else { } else {
NumBytes = StackSize - CSSize; NumBytes = StackSize - CSSize;
} }
@ -1666,6 +1699,14 @@ void X86FrameLowering::emitEpilogue(MachineFunction &MF,
} else if (NumBytes) { } else if (NumBytes) {
// Adjust stack pointer back: ESP += numbytes. // Adjust stack pointer back: ESP += numbytes.
emitSPUpdate(MBB, MBBI, NumBytes, /*InEpilogue=*/true); emitSPUpdate(MBB, MBBI, NumBytes, /*InEpilogue=*/true);
if (!hasFP(MF) && NeedsDwarfCFI) {
// Define the current CFA rule to use the provided offset.
BuildCFI(MBB, MBBI, DL, MCCFIInstruction::createDefCfaOffset(
nullptr, -CSSize - SlotSize));
MBB.setDefOffset(true);
MBB.updateCFIInfo(std::prev(MBBI));
InsertedCFI = true;
}
--MBBI; --MBBI;
} }
@ -1678,6 +1719,26 @@ void X86FrameLowering::emitEpilogue(MachineFunction &MF,
if (NeedsWinCFI && MF.hasWinCFI()) if (NeedsWinCFI && MF.hasWinCFI())
BuildMI(MBB, MBBI, DL, TII.get(X86::SEH_Epilogue)); BuildMI(MBB, MBBI, DL, TII.get(X86::SEH_Epilogue));
if (!hasFP(MF) && NeedsDwarfCFI) {
MBBI = FirstCSPop;
int64_t Offset = -CSSize - SlotSize;
// Mark callee-saved pop instruction.
// Define the current CFA rule to use the provided offset.
while (MBBI != MBB.end()) {
MachineBasicBlock::iterator PI = MBBI;
unsigned Opc = PI->getOpcode();
++MBBI;
if (Opc == X86::POP32r || Opc == X86::POP64r) {
Offset += SlotSize;
BuildCFI(MBB, MBBI, DL,
MCCFIInstruction::createDefCfaOffset(nullptr, Offset));
MBB.setDefOffset(true);
MBB.updateCFIInfo(std::prev(MBBI));
InsertedCFI = true;
}
}
}
if (!RetOpcode || !isTailCallOpcode(*RetOpcode)) { if (!RetOpcode || !isTailCallOpcode(*RetOpcode)) {
// Add the return addr area delta back since we are not tail calling. // Add the return addr area delta back since we are not tail calling.
int Offset = -1 * X86FI->getTCReturnAddrDelta(); int Offset = -1 * X86FI->getTCReturnAddrDelta();
@ -1690,6 +1751,9 @@ void X86FrameLowering::emitEpilogue(MachineFunction &MF,
emitSPUpdate(MBB, MBBI, Offset, /*InEpilogue=*/true); emitSPUpdate(MBB, MBBI, Offset, /*InEpilogue=*/true);
} }
} }
if (InsertedCFI)
MBB.updateCFIInfoSucc();
} }
int X86FrameLowering::getFrameIndexReference(const MachineFunction &MF, int FI, int X86FrameLowering::getFrameIndexReference(const MachineFunction &MF, int FI,
@ -2364,6 +2428,19 @@ void X86FrameLowering::adjustForSegmentedStacks(
checkMBB->addSuccessor(allocMBB); checkMBB->addSuccessor(allocMBB);
checkMBB->addSuccessor(&PrologueMBB); checkMBB->addSuccessor(&PrologueMBB);
int InitialOffset = TRI->getSlotSize();
unsigned InitialRegister = TRI->getDwarfRegNum(StackPtr, true);
// Set CFI info for checkMBB.
checkMBB->setIncomingCFAOffset(InitialOffset);
checkMBB->setIncomingCFARegister(InitialRegister);
checkMBB->setOutgoingCFAOffset(InitialOffset);
checkMBB->setOutgoingCFARegister(InitialRegister);
// Set CFI info for allocMBB.
allocMBB->setIncomingCFAOffset(InitialOffset);
allocMBB->setIncomingCFARegister(InitialRegister);
allocMBB->setOutgoingCFAOffset(InitialOffset);
allocMBB->setOutgoingCFARegister(InitialRegister);
#ifdef EXPENSIVE_CHECKS #ifdef EXPENSIVE_CHECKS
MF.verify(); MF.verify();
#endif #endif
@ -2535,6 +2612,19 @@ void X86FrameLowering::adjustForHiPEPrologue(
stackCheckMBB->addSuccessor(incStackMBB, {1, 100}); stackCheckMBB->addSuccessor(incStackMBB, {1, 100});
incStackMBB->addSuccessor(&PrologueMBB, {99, 100}); incStackMBB->addSuccessor(&PrologueMBB, {99, 100});
incStackMBB->addSuccessor(incStackMBB, {1, 100}); incStackMBB->addSuccessor(incStackMBB, {1, 100});
int InitialOffset = TRI->getSlotSize();
unsigned InitialRegister = TRI->getDwarfRegNum(StackPtr, true);
// Set CFI info to stackCheckMBB.
stackCheckMBB->setIncomingCFAOffset(InitialOffset);
stackCheckMBB->setIncomingCFARegister(InitialRegister);
stackCheckMBB->setOutgoingCFAOffset(InitialOffset);
stackCheckMBB->setOutgoingCFARegister(InitialRegister);
// Set CFI info to incStackMBB.
incStackMBB->setIncomingCFAOffset(InitialOffset);
incStackMBB->setIncomingCFARegister(InitialRegister);
incStackMBB->setOutgoingCFAOffset(InitialOffset);
incStackMBB->setOutgoingCFARegister(InitialRegister);
} }
#ifdef EXPENSIVE_CHECKS #ifdef EXPENSIVE_CHECKS
MF.verify(); MF.verify();
@ -2640,6 +2730,7 @@ eliminateCallFramePseudoInstr(MachineFunction &MF, MachineBasicBlock &MBB,
bool WindowsCFI = MF.getTarget().getMCAsmInfo()->usesWindowsCFI(); bool WindowsCFI = MF.getTarget().getMCAsmInfo()->usesWindowsCFI();
bool DwarfCFI = !WindowsCFI && bool DwarfCFI = !WindowsCFI &&
(MMI.hasDebugInfo() || Fn->needsUnwindTableEntry()); (MMI.hasDebugInfo() || Fn->needsUnwindTableEntry());
bool InsertedCFI = false;
// If we have any exception handlers in this function, and we adjust // If we have any exception handlers in this function, and we adjust
// the SP before calls, we may need to indicate this to the unwinder // the SP before calls, we may need to indicate this to the unwinder
@ -2665,10 +2756,12 @@ eliminateCallFramePseudoInstr(MachineFunction &MF, MachineBasicBlock &MBB,
// TODO: This is needed only if we require precise CFA. // TODO: This is needed only if we require precise CFA.
// If this is a callee-pop calling convention, emit a CFA adjust for // If this is a callee-pop calling convention, emit a CFA adjust for
// the amount the callee popped. // the amount the callee popped.
if (isDestroy && InternalAmt && DwarfCFI && !hasFP(MF)) if (isDestroy && InternalAmt && DwarfCFI && !hasFP(MF)) {
BuildCFI(MBB, InsertPos, DL, BuildCFI(MBB, InsertPos, DL,
MCCFIInstruction::createAdjustCfaOffset(nullptr, -InternalAmt)); MCCFIInstruction::createAdjustCfaOffset(nullptr, -InternalAmt));
MBB.updateCFIInfo(std::prev(InsertPos));
InsertedCFI = true;
}
// Add Amount to SP to destroy a frame, or subtract to setup. // Add Amount to SP to destroy a frame, or subtract to setup.
int64_t StackAdjustment = isDestroy ? Amount : -Amount; int64_t StackAdjustment = isDestroy ? Amount : -Amount;
int64_t CfaAdjustment = -StackAdjustment; int64_t CfaAdjustment = -StackAdjustment;
@ -2702,9 +2795,13 @@ eliminateCallFramePseudoInstr(MachineFunction &MF, MachineBasicBlock &MBB,
BuildCFI(MBB, InsertPos, DL, BuildCFI(MBB, InsertPos, DL,
MCCFIInstruction::createAdjustCfaOffset(nullptr, MCCFIInstruction::createAdjustCfaOffset(nullptr,
CfaAdjustment)); CfaAdjustment));
MBB.updateCFIInfo(std::prev(InsertPos));
InsertedCFI = true;
} }
} }
if (InsertedCFI) MBB.updateCFIInfoSucc();
return I; return I;
} }
@ -2826,6 +2923,22 @@ MachineBasicBlock::iterator X86FrameLowering::restoreWin32EHStackPointers(
return MBBI; return MBBI;
} }
void X86FrameLowering::initializeCFIInfo(MachineFunction &MF) const {
int InitialOffset = TRI->getSlotSize();
unsigned InitialRegister = TRI->getDwarfRegNum(StackPtr, true);
// Initialize CFI info if it hasn't already been initialized.
for (auto &MBB : MF) {
if (MBB.getIncomingCFAOffset() == -1)
MBB.setIncomingCFAOffset(InitialOffset);
if (MBB.getOutgoingCFAOffset() == -1)
MBB.setOutgoingCFAOffset(InitialOffset);
if (MBB.getIncomingCFARegister() == 0)
MBB.setIncomingCFARegister(InitialRegister);
if (MBB.getOutgoingCFARegister() == 0)
MBB.setOutgoingCFARegister(InitialRegister);
}
}
namespace { namespace {
// Struct used by orderFrameObjects to help sort the stack objects. // Struct used by orderFrameObjects to help sort the stack objects.
struct X86FrameSortingObject { struct X86FrameSortingObject {

View File

@ -177,6 +177,8 @@ public:
MachineBasicBlock::iterator MBBI, MachineBasicBlock::iterator MBBI,
const DebugLoc &DL, bool RestoreSP = false) const; const DebugLoc &DL, bool RestoreSP = false) const;
void initializeCFIInfo(MachineFunction &MF) const override;
private: private:
uint64_t calculateMaxStackAlign(const MachineFunction &MF) const; uint64_t calculateMaxStackAlign(const MachineFunction &MF) const;

View File

@ -23,6 +23,8 @@ lpad: ; preds = %cont, %entry
} }
; CHECK: lpad ; CHECK: lpad
; CHECK-NEXT: :
; CHECK-NEXT: .cfi_def_cfa_offset 16
; CHECK-NEXT: Ltmp ; CHECK-NEXT: Ltmp
declare i32 @__gxx_personality_v0(...) declare i32 @__gxx_personality_v0(...)

View File

@ -88,6 +88,8 @@ define void @full_test() {
; X32-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero ; X32-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero
; X32-NEXT: movsd %xmm0, {{[0-9]+}}(%esp) ; X32-NEXT: movsd %xmm0, {{[0-9]+}}(%esp)
; X32-NEXT: addl $60, %esp ; X32-NEXT: addl $60, %esp
; X32-NEXT: .Lcfi1:
; X32-NEXT: .cfi_def_cfa_offset 4
; X32-NEXT: retl ; X32-NEXT: retl
; ;
; X64-LABEL: full_test: ; X64-LABEL: full_test:

View File

@ -23,6 +23,8 @@ define i64 @test_add_i64(i64 %arg1, i64 %arg2) {
; X32-NEXT: addl 8(%ebp), %eax ; X32-NEXT: addl 8(%ebp), %eax
; X32-NEXT: adcl 12(%ebp), %edx ; X32-NEXT: adcl 12(%ebp), %edx
; X32-NEXT: popl %ebp ; X32-NEXT: popl %ebp
; X32-NEXT: .Lcfi3:
; X32-NEXT: .cfi_def_cfa %esp, 4
; X32-NEXT: retl ; X32-NEXT: retl
%ret = add i64 %arg1, %arg2 %ret = add i64 %arg1, %arg2
ret i64 %ret ret i64 %ret

View File

@ -19,6 +19,8 @@ define i32* @allocai32() {
; X32-NEXT: .cfi_def_cfa_offset 8 ; X32-NEXT: .cfi_def_cfa_offset 8
; X32-NEXT: movl %esp, %eax ; X32-NEXT: movl %esp, %eax
; X32-NEXT: popl %ecx ; X32-NEXT: popl %ecx
; X32-NEXT: .Lcfi1:
; X32-NEXT: .cfi_def_cfa_offset 4
; X32-NEXT: retl ; X32-NEXT: retl
; ;
; X32ABI-LABEL: allocai32: ; X32ABI-LABEL: allocai32:

View File

@ -46,7 +46,9 @@
; CHECK-NEXT: Post-RA pseudo instruction expansion pass ; CHECK-NEXT: Post-RA pseudo instruction expansion pass
; CHECK-NEXT: X86 pseudo instruction expansion pass ; CHECK-NEXT: X86 pseudo instruction expansion pass
; CHECK-NEXT: Analyze Machine Code For Garbage Collection ; CHECK-NEXT: Analyze Machine Code For Garbage Collection
; CHECK-NEXT: Verify that corresponding in/out CFI info matches
; CHECK-NEXT: X86 vzeroupper inserter ; CHECK-NEXT: X86 vzeroupper inserter
; CHECK-NEXT: CFI Instruction Inserter
; CHECK-NEXT: Contiguously Lay Out Funclets ; CHECK-NEXT: Contiguously Lay Out Funclets
; CHECK-NEXT: StackMap Liveness Analysis ; CHECK-NEXT: StackMap Liveness Analysis
; CHECK-NEXT: Live DEBUG_VALUE analysis ; CHECK-NEXT: Live DEBUG_VALUE analysis

View File

@ -591,6 +591,8 @@ define void @avg_v64i8(<64 x i8>* %a, <64 x i8>* %b) {
; AVX1-NEXT: vmovups %ymm0, (%rax) ; AVX1-NEXT: vmovups %ymm0, (%rax)
; AVX1-NEXT: vmovups %ymm1, (%rax) ; AVX1-NEXT: vmovups %ymm1, (%rax)
; AVX1-NEXT: addq $24, %rsp ; AVX1-NEXT: addq $24, %rsp
; AVX1-NEXT: .Lcfi1:
; AVX1-NEXT: .cfi_def_cfa_offset 8
; AVX1-NEXT: vzeroupper ; AVX1-NEXT: vzeroupper
; AVX1-NEXT: retq ; AVX1-NEXT: retq
; ;

View File

@ -414,6 +414,8 @@ define <16 x float> @broadcast_ss_spill(float %x) {
; ALL-NEXT: callq func_f32 ; ALL-NEXT: callq func_f32
; ALL-NEXT: vbroadcastss (%rsp), %zmm0 # 16-byte Folded Reload ; ALL-NEXT: vbroadcastss (%rsp), %zmm0 # 16-byte Folded Reload
; ALL-NEXT: addq $24, %rsp ; ALL-NEXT: addq $24, %rsp
; ALL-NEXT: .Lcfi1:
; ALL-NEXT: .cfi_def_cfa_offset 8
; ALL-NEXT: retq ; ALL-NEXT: retq
%a = fadd float %x, %x %a = fadd float %x, %x
call void @func_f32(float %a) call void @func_f32(float %a)
@ -427,13 +429,15 @@ define <8 x double> @broadcast_sd_spill(double %x) {
; ALL-LABEL: broadcast_sd_spill: ; ALL-LABEL: broadcast_sd_spill:
; ALL: # BB#0: ; ALL: # BB#0:
; ALL-NEXT: subq $24, %rsp ; ALL-NEXT: subq $24, %rsp
; ALL-NEXT: .Lcfi1: ; ALL-NEXT: .Lcfi2:
; ALL-NEXT: .cfi_def_cfa_offset 32 ; ALL-NEXT: .cfi_def_cfa_offset 32
; ALL-NEXT: vaddsd %xmm0, %xmm0, %xmm0 ; ALL-NEXT: vaddsd %xmm0, %xmm0, %xmm0
; ALL-NEXT: vmovapd %xmm0, (%rsp) # 16-byte Spill ; ALL-NEXT: vmovapd %xmm0, (%rsp) # 16-byte Spill
; ALL-NEXT: callq func_f64 ; ALL-NEXT: callq func_f64
; ALL-NEXT: vbroadcastsd (%rsp), %zmm0 # 16-byte Folded Reload ; ALL-NEXT: vbroadcastsd (%rsp), %zmm0 # 16-byte Folded Reload
; ALL-NEXT: addq $24, %rsp ; ALL-NEXT: addq $24, %rsp
; ALL-NEXT: .Lcfi3:
; ALL-NEXT: .cfi_def_cfa_offset 8
; ALL-NEXT: retq ; ALL-NEXT: retq
%a = fadd double %x, %x %a = fadd double %x, %x
call void @func_f64(double %a) call void @func_f64(double %a)

View File

@ -289,6 +289,8 @@ define i64 @test_pcmpeq_b(<64 x i8> %a, <64 x i8> %b) {
; AVX512F-32-NEXT: movl (%esp), %eax ; AVX512F-32-NEXT: movl (%esp), %eax
; AVX512F-32-NEXT: movl {{[0-9]+}}(%esp), %edx ; AVX512F-32-NEXT: movl {{[0-9]+}}(%esp), %edx
; AVX512F-32-NEXT: addl $12, %esp ; AVX512F-32-NEXT: addl $12, %esp
; AVX512F-32-NEXT: .Lcfi1:
; AVX512F-32-NEXT: .cfi_def_cfa_offset 4
; AVX512F-32-NEXT: retl ; AVX512F-32-NEXT: retl
%res = call i64 @llvm.x86.avx512.mask.pcmpeq.b.512(<64 x i8> %a, <64 x i8> %b, i64 -1) %res = call i64 @llvm.x86.avx512.mask.pcmpeq.b.512(<64 x i8> %a, <64 x i8> %b, i64 -1)
ret i64 %res ret i64 %res
@ -305,7 +307,7 @@ define i64 @test_mask_pcmpeq_b(<64 x i8> %a, <64 x i8> %b, i64 %mask) {
; AVX512F-32-LABEL: test_mask_pcmpeq_b: ; AVX512F-32-LABEL: test_mask_pcmpeq_b:
; AVX512F-32: # BB#0: ; AVX512F-32: # BB#0:
; AVX512F-32-NEXT: subl $12, %esp ; AVX512F-32-NEXT: subl $12, %esp
; AVX512F-32-NEXT: .Lcfi1: ; AVX512F-32-NEXT: .Lcfi2:
; AVX512F-32-NEXT: .cfi_def_cfa_offset 16 ; AVX512F-32-NEXT: .cfi_def_cfa_offset 16
; AVX512F-32-NEXT: kmovq {{[0-9]+}}(%esp), %k1 ; AVX512F-32-NEXT: kmovq {{[0-9]+}}(%esp), %k1
; AVX512F-32-NEXT: vpcmpeqb %zmm1, %zmm0, %k0 {%k1} ; AVX512F-32-NEXT: vpcmpeqb %zmm1, %zmm0, %k0 {%k1}
@ -313,6 +315,8 @@ define i64 @test_mask_pcmpeq_b(<64 x i8> %a, <64 x i8> %b, i64 %mask) {
; AVX512F-32-NEXT: movl (%esp), %eax ; AVX512F-32-NEXT: movl (%esp), %eax
; AVX512F-32-NEXT: movl {{[0-9]+}}(%esp), %edx ; AVX512F-32-NEXT: movl {{[0-9]+}}(%esp), %edx
; AVX512F-32-NEXT: addl $12, %esp ; AVX512F-32-NEXT: addl $12, %esp
; AVX512F-32-NEXT: .Lcfi3:
; AVX512F-32-NEXT: .cfi_def_cfa_offset 4
; AVX512F-32-NEXT: retl ; AVX512F-32-NEXT: retl
%res = call i64 @llvm.x86.avx512.mask.pcmpeq.b.512(<64 x i8> %a, <64 x i8> %b, i64 %mask) %res = call i64 @llvm.x86.avx512.mask.pcmpeq.b.512(<64 x i8> %a, <64 x i8> %b, i64 %mask)
ret i64 %res ret i64 %res
@ -366,13 +370,15 @@ define i64 @test_pcmpgt_b(<64 x i8> %a, <64 x i8> %b) {
; AVX512F-32-LABEL: test_pcmpgt_b: ; AVX512F-32-LABEL: test_pcmpgt_b:
; AVX512F-32: # BB#0: ; AVX512F-32: # BB#0:
; AVX512F-32-NEXT: subl $12, %esp ; AVX512F-32-NEXT: subl $12, %esp
; AVX512F-32-NEXT: .Lcfi2: ; AVX512F-32-NEXT: .Lcfi4:
; AVX512F-32-NEXT: .cfi_def_cfa_offset 16 ; AVX512F-32-NEXT: .cfi_def_cfa_offset 16
; AVX512F-32-NEXT: vpcmpgtb %zmm1, %zmm0, %k0 ; AVX512F-32-NEXT: vpcmpgtb %zmm1, %zmm0, %k0
; AVX512F-32-NEXT: kmovq %k0, (%esp) ; AVX512F-32-NEXT: kmovq %k0, (%esp)
; AVX512F-32-NEXT: movl (%esp), %eax ; AVX512F-32-NEXT: movl (%esp), %eax
; AVX512F-32-NEXT: movl {{[0-9]+}}(%esp), %edx ; AVX512F-32-NEXT: movl {{[0-9]+}}(%esp), %edx
; AVX512F-32-NEXT: addl $12, %esp ; AVX512F-32-NEXT: addl $12, %esp
; AVX512F-32-NEXT: .Lcfi5:
; AVX512F-32-NEXT: .cfi_def_cfa_offset 4
; AVX512F-32-NEXT: retl ; AVX512F-32-NEXT: retl
%res = call i64 @llvm.x86.avx512.mask.pcmpgt.b.512(<64 x i8> %a, <64 x i8> %b, i64 -1) %res = call i64 @llvm.x86.avx512.mask.pcmpgt.b.512(<64 x i8> %a, <64 x i8> %b, i64 -1)
ret i64 %res ret i64 %res
@ -389,7 +395,7 @@ define i64 @test_mask_pcmpgt_b(<64 x i8> %a, <64 x i8> %b, i64 %mask) {
; AVX512F-32-LABEL: test_mask_pcmpgt_b: ; AVX512F-32-LABEL: test_mask_pcmpgt_b:
; AVX512F-32: # BB#0: ; AVX512F-32: # BB#0:
; AVX512F-32-NEXT: subl $12, %esp ; AVX512F-32-NEXT: subl $12, %esp
; AVX512F-32-NEXT: .Lcfi3: ; AVX512F-32-NEXT: .Lcfi6:
; AVX512F-32-NEXT: .cfi_def_cfa_offset 16 ; AVX512F-32-NEXT: .cfi_def_cfa_offset 16
; AVX512F-32-NEXT: kmovq {{[0-9]+}}(%esp), %k1 ; AVX512F-32-NEXT: kmovq {{[0-9]+}}(%esp), %k1
; AVX512F-32-NEXT: vpcmpgtb %zmm1, %zmm0, %k0 {%k1} ; AVX512F-32-NEXT: vpcmpgtb %zmm1, %zmm0, %k0 {%k1}
@ -397,6 +403,8 @@ define i64 @test_mask_pcmpgt_b(<64 x i8> %a, <64 x i8> %b, i64 %mask) {
; AVX512F-32-NEXT: movl (%esp), %eax ; AVX512F-32-NEXT: movl (%esp), %eax
; AVX512F-32-NEXT: movl {{[0-9]+}}(%esp), %edx ; AVX512F-32-NEXT: movl {{[0-9]+}}(%esp), %edx
; AVX512F-32-NEXT: addl $12, %esp ; AVX512F-32-NEXT: addl $12, %esp
; AVX512F-32-NEXT: .Lcfi7:
; AVX512F-32-NEXT: .cfi_def_cfa_offset 4
; AVX512F-32-NEXT: retl ; AVX512F-32-NEXT: retl
%res = call i64 @llvm.x86.avx512.mask.pcmpgt.b.512(<64 x i8> %a, <64 x i8> %b, i64 %mask) %res = call i64 @llvm.x86.avx512.mask.pcmpgt.b.512(<64 x i8> %a, <64 x i8> %b, i64 %mask)
ret i64 %res ret i64 %res
@ -1593,7 +1601,7 @@ define i64 @test_cmp_b_512(<64 x i8> %a0, <64 x i8> %a1) {
; AVX512F-32-LABEL: test_cmp_b_512: ; AVX512F-32-LABEL: test_cmp_b_512:
; AVX512F-32: # BB#0: ; AVX512F-32: # BB#0:
; AVX512F-32-NEXT: subl $60, %esp ; AVX512F-32-NEXT: subl $60, %esp
; AVX512F-32-NEXT: .Lcfi4: ; AVX512F-32-NEXT: .Lcfi8:
; AVX512F-32-NEXT: .cfi_def_cfa_offset 64 ; AVX512F-32-NEXT: .cfi_def_cfa_offset 64
; AVX512F-32-NEXT: vpcmpeqb %zmm1, %zmm0, %k0 ; AVX512F-32-NEXT: vpcmpeqb %zmm1, %zmm0, %k0
; AVX512F-32-NEXT: kmovq %k0, {{[0-9]+}}(%esp) ; AVX512F-32-NEXT: kmovq %k0, {{[0-9]+}}(%esp)
@ -1624,6 +1632,8 @@ define i64 @test_cmp_b_512(<64 x i8> %a0, <64 x i8> %a1) {
; AVX512F-32-NEXT: addl {{[0-9]+}}(%esp), %eax ; AVX512F-32-NEXT: addl {{[0-9]+}}(%esp), %eax
; AVX512F-32-NEXT: adcxl {{[0-9]+}}(%esp), %edx ; AVX512F-32-NEXT: adcxl {{[0-9]+}}(%esp), %edx
; AVX512F-32-NEXT: addl $60, %esp ; AVX512F-32-NEXT: addl $60, %esp
; AVX512F-32-NEXT: .Lcfi9:
; AVX512F-32-NEXT: .cfi_def_cfa_offset 4
; AVX512F-32-NEXT: retl ; AVX512F-32-NEXT: retl
%res0 = call i64 @llvm.x86.avx512.mask.cmp.b.512(<64 x i8> %a0, <64 x i8> %a1, i32 0, i64 -1) %res0 = call i64 @llvm.x86.avx512.mask.cmp.b.512(<64 x i8> %a0, <64 x i8> %a1, i32 0, i64 -1)
%res1 = call i64 @llvm.x86.avx512.mask.cmp.b.512(<64 x i8> %a0, <64 x i8> %a1, i32 1, i64 -1) %res1 = call i64 @llvm.x86.avx512.mask.cmp.b.512(<64 x i8> %a0, <64 x i8> %a1, i32 1, i64 -1)
@ -1673,17 +1683,17 @@ define i64 @test_mask_cmp_b_512(<64 x i8> %a0, <64 x i8> %a1, i64 %mask) {
; AVX512F-32-LABEL: test_mask_cmp_b_512: ; AVX512F-32-LABEL: test_mask_cmp_b_512:
; AVX512F-32: # BB#0: ; AVX512F-32: # BB#0:
; AVX512F-32-NEXT: pushl %ebx ; AVX512F-32-NEXT: pushl %ebx
; AVX512F-32-NEXT: .Lcfi5: ; AVX512F-32-NEXT: .Lcfi10:
; AVX512F-32-NEXT: .cfi_def_cfa_offset 8 ; AVX512F-32-NEXT: .cfi_def_cfa_offset 8
; AVX512F-32-NEXT: pushl %esi ; AVX512F-32-NEXT: pushl %esi
; AVX512F-32-NEXT: .Lcfi6: ; AVX512F-32-NEXT: .Lcfi11:
; AVX512F-32-NEXT: .cfi_def_cfa_offset 12 ; AVX512F-32-NEXT: .cfi_def_cfa_offset 12
; AVX512F-32-NEXT: subl $60, %esp ; AVX512F-32-NEXT: subl $60, %esp
; AVX512F-32-NEXT: .Lcfi7: ; AVX512F-32-NEXT: .Lcfi12:
; AVX512F-32-NEXT: .cfi_def_cfa_offset 72 ; AVX512F-32-NEXT: .cfi_def_cfa_offset 72
; AVX512F-32-NEXT: .Lcfi8: ; AVX512F-32-NEXT: .Lcfi13:
; AVX512F-32-NEXT: .cfi_offset %esi, -12 ; AVX512F-32-NEXT: .cfi_offset %esi, -12
; AVX512F-32-NEXT: .Lcfi9: ; AVX512F-32-NEXT: .Lcfi14:
; AVX512F-32-NEXT: .cfi_offset %ebx, -8 ; AVX512F-32-NEXT: .cfi_offset %ebx, -8
; AVX512F-32-NEXT: movl {{[0-9]+}}(%esp), %ecx ; AVX512F-32-NEXT: movl {{[0-9]+}}(%esp), %ecx
; AVX512F-32-NEXT: movb %cl, %al ; AVX512F-32-NEXT: movb %cl, %al
@ -2426,8 +2436,14 @@ define i64 @test_mask_cmp_b_512(<64 x i8> %a0, <64 x i8> %a1, i64 %mask) {
; AVX512F-32-NEXT: addl %esi, %eax ; AVX512F-32-NEXT: addl %esi, %eax
; AVX512F-32-NEXT: adcxl %ecx, %edx ; AVX512F-32-NEXT: adcxl %ecx, %edx
; AVX512F-32-NEXT: addl $60, %esp ; AVX512F-32-NEXT: addl $60, %esp
; AVX512F-32-NEXT: .Lcfi15:
; AVX512F-32-NEXT: .cfi_def_cfa_offset 12
; AVX512F-32-NEXT: popl %esi ; AVX512F-32-NEXT: popl %esi
; AVX512F-32-NEXT: .Lcfi16:
; AVX512F-32-NEXT: .cfi_def_cfa_offset 8
; AVX512F-32-NEXT: popl %ebx ; AVX512F-32-NEXT: popl %ebx
; AVX512F-32-NEXT: .Lcfi17:
; AVX512F-32-NEXT: .cfi_def_cfa_offset 4
; AVX512F-32-NEXT: retl ; AVX512F-32-NEXT: retl
%res0 = call i64 @llvm.x86.avx512.mask.cmp.b.512(<64 x i8> %a0, <64 x i8> %a1, i32 0, i64 %mask) %res0 = call i64 @llvm.x86.avx512.mask.cmp.b.512(<64 x i8> %a0, <64 x i8> %a1, i32 0, i64 %mask)
%res1 = call i64 @llvm.x86.avx512.mask.cmp.b.512(<64 x i8> %a0, <64 x i8> %a1, i32 1, i64 %mask) %res1 = call i64 @llvm.x86.avx512.mask.cmp.b.512(<64 x i8> %a0, <64 x i8> %a1, i32 1, i64 %mask)
@ -2477,7 +2493,7 @@ define i64 @test_ucmp_b_512(<64 x i8> %a0, <64 x i8> %a1) {
; AVX512F-32-LABEL: test_ucmp_b_512: ; AVX512F-32-LABEL: test_ucmp_b_512:
; AVX512F-32: # BB#0: ; AVX512F-32: # BB#0:
; AVX512F-32-NEXT: subl $60, %esp ; AVX512F-32-NEXT: subl $60, %esp
; AVX512F-32-NEXT: .Lcfi10: ; AVX512F-32-NEXT: .Lcfi18:
; AVX512F-32-NEXT: .cfi_def_cfa_offset 64 ; AVX512F-32-NEXT: .cfi_def_cfa_offset 64
; AVX512F-32-NEXT: vpcmpeqb %zmm1, %zmm0, %k0 ; AVX512F-32-NEXT: vpcmpeqb %zmm1, %zmm0, %k0
; AVX512F-32-NEXT: kmovq %k0, {{[0-9]+}}(%esp) ; AVX512F-32-NEXT: kmovq %k0, {{[0-9]+}}(%esp)
@ -2508,6 +2524,8 @@ define i64 @test_ucmp_b_512(<64 x i8> %a0, <64 x i8> %a1) {
; AVX512F-32-NEXT: addl {{[0-9]+}}(%esp), %eax ; AVX512F-32-NEXT: addl {{[0-9]+}}(%esp), %eax
; AVX512F-32-NEXT: adcxl {{[0-9]+}}(%esp), %edx ; AVX512F-32-NEXT: adcxl {{[0-9]+}}(%esp), %edx
; AVX512F-32-NEXT: addl $60, %esp ; AVX512F-32-NEXT: addl $60, %esp
; AVX512F-32-NEXT: .Lcfi19:
; AVX512F-32-NEXT: .cfi_def_cfa_offset 4
; AVX512F-32-NEXT: retl ; AVX512F-32-NEXT: retl
%res0 = call i64 @llvm.x86.avx512.mask.ucmp.b.512(<64 x i8> %a0, <64 x i8> %a1, i32 0, i64 -1) %res0 = call i64 @llvm.x86.avx512.mask.ucmp.b.512(<64 x i8> %a0, <64 x i8> %a1, i32 0, i64 -1)
%res1 = call i64 @llvm.x86.avx512.mask.ucmp.b.512(<64 x i8> %a0, <64 x i8> %a1, i32 1, i64 -1) %res1 = call i64 @llvm.x86.avx512.mask.ucmp.b.512(<64 x i8> %a0, <64 x i8> %a1, i32 1, i64 -1)
@ -2557,17 +2575,17 @@ define i64 @test_mask_x86_avx512_ucmp_b_512(<64 x i8> %a0, <64 x i8> %a1, i64 %m
; AVX512F-32-LABEL: test_mask_x86_avx512_ucmp_b_512: ; AVX512F-32-LABEL: test_mask_x86_avx512_ucmp_b_512:
; AVX512F-32: # BB#0: ; AVX512F-32: # BB#0:
; AVX512F-32-NEXT: pushl %ebx ; AVX512F-32-NEXT: pushl %ebx
; AVX512F-32-NEXT: .Lcfi11: ; AVX512F-32-NEXT: .Lcfi20:
; AVX512F-32-NEXT: .cfi_def_cfa_offset 8 ; AVX512F-32-NEXT: .cfi_def_cfa_offset 8
; AVX512F-32-NEXT: pushl %esi ; AVX512F-32-NEXT: pushl %esi
; AVX512F-32-NEXT: .Lcfi12: ; AVX512F-32-NEXT: .Lcfi21:
; AVX512F-32-NEXT: .cfi_def_cfa_offset 12 ; AVX512F-32-NEXT: .cfi_def_cfa_offset 12
; AVX512F-32-NEXT: subl $60, %esp ; AVX512F-32-NEXT: subl $60, %esp
; AVX512F-32-NEXT: .Lcfi13: ; AVX512F-32-NEXT: .Lcfi22:
; AVX512F-32-NEXT: .cfi_def_cfa_offset 72 ; AVX512F-32-NEXT: .cfi_def_cfa_offset 72
; AVX512F-32-NEXT: .Lcfi14: ; AVX512F-32-NEXT: .Lcfi23:
; AVX512F-32-NEXT: .cfi_offset %esi, -12 ; AVX512F-32-NEXT: .cfi_offset %esi, -12
; AVX512F-32-NEXT: .Lcfi15: ; AVX512F-32-NEXT: .Lcfi24:
; AVX512F-32-NEXT: .cfi_offset %ebx, -8 ; AVX512F-32-NEXT: .cfi_offset %ebx, -8
; AVX512F-32-NEXT: movl {{[0-9]+}}(%esp), %ecx ; AVX512F-32-NEXT: movl {{[0-9]+}}(%esp), %ecx
; AVX512F-32-NEXT: movb %cl, %al ; AVX512F-32-NEXT: movb %cl, %al
@ -3310,8 +3328,14 @@ define i64 @test_mask_x86_avx512_ucmp_b_512(<64 x i8> %a0, <64 x i8> %a1, i64 %m
; AVX512F-32-NEXT: addl %esi, %eax ; AVX512F-32-NEXT: addl %esi, %eax
; AVX512F-32-NEXT: adcxl %ecx, %edx ; AVX512F-32-NEXT: adcxl %ecx, %edx
; AVX512F-32-NEXT: addl $60, %esp ; AVX512F-32-NEXT: addl $60, %esp
; AVX512F-32-NEXT: .Lcfi25:
; AVX512F-32-NEXT: .cfi_def_cfa_offset 12
; AVX512F-32-NEXT: popl %esi ; AVX512F-32-NEXT: popl %esi
; AVX512F-32-NEXT: .Lcfi26:
; AVX512F-32-NEXT: .cfi_def_cfa_offset 8
; AVX512F-32-NEXT: popl %ebx ; AVX512F-32-NEXT: popl %ebx
; AVX512F-32-NEXT: .Lcfi27:
; AVX512F-32-NEXT: .cfi_def_cfa_offset 4
; AVX512F-32-NEXT: retl ; AVX512F-32-NEXT: retl
%res0 = call i64 @llvm.x86.avx512.mask.ucmp.b.512(<64 x i8> %a0, <64 x i8> %a1, i32 0, i64 %mask) %res0 = call i64 @llvm.x86.avx512.mask.ucmp.b.512(<64 x i8> %a0, <64 x i8> %a1, i32 0, i64 %mask)
%res1 = call i64 @llvm.x86.avx512.mask.ucmp.b.512(<64 x i8> %a0, <64 x i8> %a1, i32 1, i64 %mask) %res1 = call i64 @llvm.x86.avx512.mask.ucmp.b.512(<64 x i8> %a0, <64 x i8> %a1, i32 1, i64 %mask)

View File

@ -1600,6 +1600,8 @@ define i64@test_int_x86_avx512_kunpck_qd(i64 %x0, i64 %x1) {
; AVX512F-32-NEXT: movl (%esp), %eax ; AVX512F-32-NEXT: movl (%esp), %eax
; AVX512F-32-NEXT: movl {{[0-9]+}}(%esp), %edx ; AVX512F-32-NEXT: movl {{[0-9]+}}(%esp), %edx
; AVX512F-32-NEXT: addl $12, %esp ; AVX512F-32-NEXT: addl $12, %esp
; AVX512F-32-NEXT: .Lcfi1:
; AVX512F-32-NEXT: .cfi_def_cfa_offset 4
; AVX512F-32-NEXT: retl ; AVX512F-32-NEXT: retl
%res = call i64 @llvm.x86.avx512.kunpck.dq(i64 %x0, i64 %x1) %res = call i64 @llvm.x86.avx512.kunpck.dq(i64 %x0, i64 %x1)
ret i64 %res ret i64 %res
@ -1617,13 +1619,15 @@ define i64@test_int_x86_avx512_cvtb2mask_512(<64 x i8> %x0) {
; AVX512F-32-LABEL: test_int_x86_avx512_cvtb2mask_512: ; AVX512F-32-LABEL: test_int_x86_avx512_cvtb2mask_512:
; AVX512F-32: # BB#0: ; AVX512F-32: # BB#0:
; AVX512F-32-NEXT: subl $12, %esp ; AVX512F-32-NEXT: subl $12, %esp
; AVX512F-32-NEXT: .Lcfi1: ; AVX512F-32-NEXT: .Lcfi2:
; AVX512F-32-NEXT: .cfi_def_cfa_offset 16 ; AVX512F-32-NEXT: .cfi_def_cfa_offset 16
; AVX512F-32-NEXT: vpmovb2m %zmm0, %k0 ; AVX512F-32-NEXT: vpmovb2m %zmm0, %k0
; AVX512F-32-NEXT: kmovq %k0, (%esp) ; AVX512F-32-NEXT: kmovq %k0, (%esp)
; AVX512F-32-NEXT: movl (%esp), %eax ; AVX512F-32-NEXT: movl (%esp), %eax
; AVX512F-32-NEXT: movl {{[0-9]+}}(%esp), %edx ; AVX512F-32-NEXT: movl {{[0-9]+}}(%esp), %edx
; AVX512F-32-NEXT: addl $12, %esp ; AVX512F-32-NEXT: addl $12, %esp
; AVX512F-32-NEXT: .Lcfi3:
; AVX512F-32-NEXT: .cfi_def_cfa_offset 4
; AVX512F-32-NEXT: retl ; AVX512F-32-NEXT: retl
%res = call i64 @llvm.x86.avx512.cvtb2mask.512(<64 x i8> %x0) %res = call i64 @llvm.x86.avx512.cvtb2mask.512(<64 x i8> %x0)
ret i64 %res ret i64 %res
@ -1801,7 +1805,7 @@ define i64@test_int_x86_avx512_ptestm_b_512(<64 x i8> %x0, <64 x i8> %x1, i64 %x
; AVX512F-32-LABEL: test_int_x86_avx512_ptestm_b_512: ; AVX512F-32-LABEL: test_int_x86_avx512_ptestm_b_512:
; AVX512F-32: # BB#0: ; AVX512F-32: # BB#0:
; AVX512F-32-NEXT: subl $20, %esp ; AVX512F-32-NEXT: subl $20, %esp
; AVX512F-32-NEXT: .Lcfi2: ; AVX512F-32-NEXT: .Lcfi4:
; AVX512F-32-NEXT: .cfi_def_cfa_offset 24 ; AVX512F-32-NEXT: .cfi_def_cfa_offset 24
; AVX512F-32-NEXT: kmovd {{[0-9]+}}(%esp), %k0 ; AVX512F-32-NEXT: kmovd {{[0-9]+}}(%esp), %k0
; AVX512F-32-NEXT: kmovd {{[0-9]+}}(%esp), %k1 ; AVX512F-32-NEXT: kmovd {{[0-9]+}}(%esp), %k1
@ -1815,6 +1819,8 @@ define i64@test_int_x86_avx512_ptestm_b_512(<64 x i8> %x0, <64 x i8> %x1, i64 %x
; AVX512F-32-NEXT: addl {{[0-9]+}}(%esp), %eax ; AVX512F-32-NEXT: addl {{[0-9]+}}(%esp), %eax
; AVX512F-32-NEXT: adcxl {{[0-9]+}}(%esp), %edx ; AVX512F-32-NEXT: adcxl {{[0-9]+}}(%esp), %edx
; AVX512F-32-NEXT: addl $20, %esp ; AVX512F-32-NEXT: addl $20, %esp
; AVX512F-32-NEXT: .Lcfi5:
; AVX512F-32-NEXT: .cfi_def_cfa_offset 4
; AVX512F-32-NEXT: retl ; AVX512F-32-NEXT: retl
%res = call i64 @llvm.x86.avx512.ptestm.b.512(<64 x i8> %x0, <64 x i8> %x1, i64 %x2) %res = call i64 @llvm.x86.avx512.ptestm.b.512(<64 x i8> %x0, <64 x i8> %x1, i64 %x2)
%res1 = call i64 @llvm.x86.avx512.ptestm.b.512(<64 x i8> %x0, <64 x i8> %x1, i64-1) %res1 = call i64 @llvm.x86.avx512.ptestm.b.512(<64 x i8> %x0, <64 x i8> %x1, i64-1)
@ -1866,7 +1872,7 @@ define i64@test_int_x86_avx512_ptestnm_b_512(<64 x i8> %x0, <64 x i8> %x1, i64 %
; AVX512F-32-LABEL: test_int_x86_avx512_ptestnm_b_512: ; AVX512F-32-LABEL: test_int_x86_avx512_ptestnm_b_512:
; AVX512F-32: # BB#0: ; AVX512F-32: # BB#0:
; AVX512F-32-NEXT: subl $20, %esp ; AVX512F-32-NEXT: subl $20, %esp
; AVX512F-32-NEXT: .Lcfi3: ; AVX512F-32-NEXT: .Lcfi6:
; AVX512F-32-NEXT: .cfi_def_cfa_offset 24 ; AVX512F-32-NEXT: .cfi_def_cfa_offset 24
; AVX512F-32-NEXT: kmovd {{[0-9]+}}(%esp), %k0 ; AVX512F-32-NEXT: kmovd {{[0-9]+}}(%esp), %k0
; AVX512F-32-NEXT: kmovd {{[0-9]+}}(%esp), %k1 ; AVX512F-32-NEXT: kmovd {{[0-9]+}}(%esp), %k1
@ -1880,6 +1886,8 @@ define i64@test_int_x86_avx512_ptestnm_b_512(<64 x i8> %x0, <64 x i8> %x1, i64 %
; AVX512F-32-NEXT: addl {{[0-9]+}}(%esp), %eax ; AVX512F-32-NEXT: addl {{[0-9]+}}(%esp), %eax
; AVX512F-32-NEXT: adcxl {{[0-9]+}}(%esp), %edx ; AVX512F-32-NEXT: adcxl {{[0-9]+}}(%esp), %edx
; AVX512F-32-NEXT: addl $20, %esp ; AVX512F-32-NEXT: addl $20, %esp
; AVX512F-32-NEXT: .Lcfi7:
; AVX512F-32-NEXT: .cfi_def_cfa_offset 4
; AVX512F-32-NEXT: retl ; AVX512F-32-NEXT: retl
%res = call i64 @llvm.x86.avx512.ptestnm.b.512(<64 x i8> %x0, <64 x i8> %x1, i64 %x2) %res = call i64 @llvm.x86.avx512.ptestnm.b.512(<64 x i8> %x0, <64 x i8> %x1, i64 %x2)
%res1 = call i64 @llvm.x86.avx512.ptestnm.b.512(<64 x i8> %x0, <64 x i8> %x1, i64-1) %res1 = call i64 @llvm.x86.avx512.ptestnm.b.512(<64 x i8> %x0, <64 x i8> %x1, i64-1)

View File

@ -33,6 +33,8 @@ define <2 x i64> @test_mm_mask_broadcastd_epi32(<2 x i64> %a0, i8 %a1, <2 x i64>
; X32-NEXT: kmovw %eax, %k1 ; X32-NEXT: kmovw %eax, %k1
; X32-NEXT: vpbroadcastd %xmm1, %xmm0 {%k1} ; X32-NEXT: vpbroadcastd %xmm1, %xmm0 {%k1}
; X32-NEXT: popl %eax ; X32-NEXT: popl %eax
; X32-NEXT: .Lcfi1:
; X32-NEXT: .cfi_def_cfa_offset 4
; X32-NEXT: retl ; X32-NEXT: retl
; ;
; X64-LABEL: test_mm_mask_broadcastd_epi32: ; X64-LABEL: test_mm_mask_broadcastd_epi32:
@ -57,7 +59,7 @@ define <2 x i64> @test_mm_maskz_broadcastd_epi32(i8 %a0, <2 x i64> %a1) {
; X32-LABEL: test_mm_maskz_broadcastd_epi32: ; X32-LABEL: test_mm_maskz_broadcastd_epi32:
; X32: # BB#0: ; X32: # BB#0:
; X32-NEXT: pushl %eax ; X32-NEXT: pushl %eax
; X32-NEXT: .Lcfi1: ; X32-NEXT: .Lcfi2:
; X32-NEXT: .cfi_def_cfa_offset 8 ; X32-NEXT: .cfi_def_cfa_offset 8
; X32-NEXT: movb {{[0-9]+}}(%esp), %al ; X32-NEXT: movb {{[0-9]+}}(%esp), %al
; X32-NEXT: andb $15, %al ; X32-NEXT: andb $15, %al
@ -66,6 +68,8 @@ define <2 x i64> @test_mm_maskz_broadcastd_epi32(i8 %a0, <2 x i64> %a1) {
; X32-NEXT: kmovw %eax, %k1 ; X32-NEXT: kmovw %eax, %k1
; X32-NEXT: vpbroadcastd %xmm0, %xmm0 {%k1} {z} ; X32-NEXT: vpbroadcastd %xmm0, %xmm0 {%k1} {z}
; X32-NEXT: popl %eax ; X32-NEXT: popl %eax
; X32-NEXT: .Lcfi3:
; X32-NEXT: .cfi_def_cfa_offset 4
; X32-NEXT: retl ; X32-NEXT: retl
; ;
; X64-LABEL: test_mm_maskz_broadcastd_epi32: ; X64-LABEL: test_mm_maskz_broadcastd_epi32:
@ -162,7 +166,7 @@ define <2 x i64> @test_mm_mask_broadcastq_epi64(<2 x i64> %a0, i8 %a1, <2 x i64>
; X32-LABEL: test_mm_mask_broadcastq_epi64: ; X32-LABEL: test_mm_mask_broadcastq_epi64:
; X32: # BB#0: ; X32: # BB#0:
; X32-NEXT: pushl %eax ; X32-NEXT: pushl %eax
; X32-NEXT: .Lcfi2: ; X32-NEXT: .Lcfi4:
; X32-NEXT: .cfi_def_cfa_offset 8 ; X32-NEXT: .cfi_def_cfa_offset 8
; X32-NEXT: movb {{[0-9]+}}(%esp), %al ; X32-NEXT: movb {{[0-9]+}}(%esp), %al
; X32-NEXT: andb $3, %al ; X32-NEXT: andb $3, %al
@ -171,6 +175,8 @@ define <2 x i64> @test_mm_mask_broadcastq_epi64(<2 x i64> %a0, i8 %a1, <2 x i64>
; X32-NEXT: kmovw %eax, %k1 ; X32-NEXT: kmovw %eax, %k1
; X32-NEXT: vpbroadcastq %xmm1, %xmm0 {%k1} ; X32-NEXT: vpbroadcastq %xmm1, %xmm0 {%k1}
; X32-NEXT: popl %eax ; X32-NEXT: popl %eax
; X32-NEXT: .Lcfi5:
; X32-NEXT: .cfi_def_cfa_offset 4
; X32-NEXT: retl ; X32-NEXT: retl
; ;
; X64-LABEL: test_mm_mask_broadcastq_epi64: ; X64-LABEL: test_mm_mask_broadcastq_epi64:
@ -192,7 +198,7 @@ define <2 x i64> @test_mm_maskz_broadcastq_epi64(i8 %a0, <2 x i64> %a1) {
; X32-LABEL: test_mm_maskz_broadcastq_epi64: ; X32-LABEL: test_mm_maskz_broadcastq_epi64:
; X32: # BB#0: ; X32: # BB#0:
; X32-NEXT: pushl %eax ; X32-NEXT: pushl %eax
; X32-NEXT: .Lcfi3: ; X32-NEXT: .Lcfi6:
; X32-NEXT: .cfi_def_cfa_offset 8 ; X32-NEXT: .cfi_def_cfa_offset 8
; X32-NEXT: movb {{[0-9]+}}(%esp), %al ; X32-NEXT: movb {{[0-9]+}}(%esp), %al
; X32-NEXT: andb $3, %al ; X32-NEXT: andb $3, %al
@ -201,6 +207,8 @@ define <2 x i64> @test_mm_maskz_broadcastq_epi64(i8 %a0, <2 x i64> %a1) {
; X32-NEXT: kmovw %eax, %k1 ; X32-NEXT: kmovw %eax, %k1
; X32-NEXT: vpbroadcastq %xmm0, %xmm0 {%k1} {z} ; X32-NEXT: vpbroadcastq %xmm0, %xmm0 {%k1} {z}
; X32-NEXT: popl %eax ; X32-NEXT: popl %eax
; X32-NEXT: .Lcfi7:
; X32-NEXT: .cfi_def_cfa_offset 4
; X32-NEXT: retl ; X32-NEXT: retl
; ;
; X64-LABEL: test_mm_maskz_broadcastq_epi64: ; X64-LABEL: test_mm_maskz_broadcastq_epi64:
@ -236,7 +244,7 @@ define <4 x i64> @test_mm256_mask_broadcastq_epi64(<4 x i64> %a0, i8 %a1, <2 x i
; X32-LABEL: test_mm256_mask_broadcastq_epi64: ; X32-LABEL: test_mm256_mask_broadcastq_epi64:
; X32: # BB#0: ; X32: # BB#0:
; X32-NEXT: pushl %eax ; X32-NEXT: pushl %eax
; X32-NEXT: .Lcfi4: ; X32-NEXT: .Lcfi8:
; X32-NEXT: .cfi_def_cfa_offset 8 ; X32-NEXT: .cfi_def_cfa_offset 8
; X32-NEXT: movb {{[0-9]+}}(%esp), %al ; X32-NEXT: movb {{[0-9]+}}(%esp), %al
; X32-NEXT: andb $15, %al ; X32-NEXT: andb $15, %al
@ -245,6 +253,8 @@ define <4 x i64> @test_mm256_mask_broadcastq_epi64(<4 x i64> %a0, i8 %a1, <2 x i
; X32-NEXT: kmovw %eax, %k1 ; X32-NEXT: kmovw %eax, %k1
; X32-NEXT: vpbroadcastq %xmm1, %ymm0 {%k1} ; X32-NEXT: vpbroadcastq %xmm1, %ymm0 {%k1}
; X32-NEXT: popl %eax ; X32-NEXT: popl %eax
; X32-NEXT: .Lcfi9:
; X32-NEXT: .cfi_def_cfa_offset 4
; X32-NEXT: retl ; X32-NEXT: retl
; ;
; X64-LABEL: test_mm256_mask_broadcastq_epi64: ; X64-LABEL: test_mm256_mask_broadcastq_epi64:
@ -266,7 +276,7 @@ define <4 x i64> @test_mm256_maskz_broadcastq_epi64(i8 %a0, <2 x i64> %a1) {
; X32-LABEL: test_mm256_maskz_broadcastq_epi64: ; X32-LABEL: test_mm256_maskz_broadcastq_epi64:
; X32: # BB#0: ; X32: # BB#0:
; X32-NEXT: pushl %eax ; X32-NEXT: pushl %eax
; X32-NEXT: .Lcfi5: ; X32-NEXT: .Lcfi10:
; X32-NEXT: .cfi_def_cfa_offset 8 ; X32-NEXT: .cfi_def_cfa_offset 8
; X32-NEXT: movb {{[0-9]+}}(%esp), %al ; X32-NEXT: movb {{[0-9]+}}(%esp), %al
; X32-NEXT: andb $15, %al ; X32-NEXT: andb $15, %al
@ -275,6 +285,8 @@ define <4 x i64> @test_mm256_maskz_broadcastq_epi64(i8 %a0, <2 x i64> %a1) {
; X32-NEXT: kmovw %eax, %k1 ; X32-NEXT: kmovw %eax, %k1
; X32-NEXT: vpbroadcastq %xmm0, %ymm0 {%k1} {z} ; X32-NEXT: vpbroadcastq %xmm0, %ymm0 {%k1} {z}
; X32-NEXT: popl %eax ; X32-NEXT: popl %eax
; X32-NEXT: .Lcfi11:
; X32-NEXT: .cfi_def_cfa_offset 4
; X32-NEXT: retl ; X32-NEXT: retl
; ;
; X64-LABEL: test_mm256_maskz_broadcastq_epi64: ; X64-LABEL: test_mm256_maskz_broadcastq_epi64:
@ -310,7 +322,7 @@ define <2 x double> @test_mm_mask_broadcastsd_pd(<2 x double> %a0, i8 %a1, <2 x
; X32-LABEL: test_mm_mask_broadcastsd_pd: ; X32-LABEL: test_mm_mask_broadcastsd_pd:
; X32: # BB#0: ; X32: # BB#0:
; X32-NEXT: pushl %eax ; X32-NEXT: pushl %eax
; X32-NEXT: .Lcfi6: ; X32-NEXT: .Lcfi12:
; X32-NEXT: .cfi_def_cfa_offset 8 ; X32-NEXT: .cfi_def_cfa_offset 8
; X32-NEXT: movb {{[0-9]+}}(%esp), %al ; X32-NEXT: movb {{[0-9]+}}(%esp), %al
; X32-NEXT: andb $3, %al ; X32-NEXT: andb $3, %al
@ -319,6 +331,8 @@ define <2 x double> @test_mm_mask_broadcastsd_pd(<2 x double> %a0, i8 %a1, <2 x
; X32-NEXT: kmovw %eax, %k1 ; X32-NEXT: kmovw %eax, %k1
; X32-NEXT: vmovddup {{.*#+}} xmm0 {%k1} = xmm1[0,0] ; X32-NEXT: vmovddup {{.*#+}} xmm0 {%k1} = xmm1[0,0]
; X32-NEXT: popl %eax ; X32-NEXT: popl %eax
; X32-NEXT: .Lcfi13:
; X32-NEXT: .cfi_def_cfa_offset 4
; X32-NEXT: retl ; X32-NEXT: retl
; ;
; X64-LABEL: test_mm_mask_broadcastsd_pd: ; X64-LABEL: test_mm_mask_broadcastsd_pd:
@ -340,7 +354,7 @@ define <2 x double> @test_mm_maskz_broadcastsd_pd(i8 %a0, <2 x double> %a1) {
; X32-LABEL: test_mm_maskz_broadcastsd_pd: ; X32-LABEL: test_mm_maskz_broadcastsd_pd:
; X32: # BB#0: ; X32: # BB#0:
; X32-NEXT: pushl %eax ; X32-NEXT: pushl %eax
; X32-NEXT: .Lcfi7: ; X32-NEXT: .Lcfi14:
; X32-NEXT: .cfi_def_cfa_offset 8 ; X32-NEXT: .cfi_def_cfa_offset 8
; X32-NEXT: movb {{[0-9]+}}(%esp), %al ; X32-NEXT: movb {{[0-9]+}}(%esp), %al
; X32-NEXT: andb $3, %al ; X32-NEXT: andb $3, %al
@ -349,6 +363,8 @@ define <2 x double> @test_mm_maskz_broadcastsd_pd(i8 %a0, <2 x double> %a1) {
; X32-NEXT: kmovw %eax, %k1 ; X32-NEXT: kmovw %eax, %k1
; X32-NEXT: vmovddup {{.*#+}} xmm0 {%k1} {z} = xmm0[0,0] ; X32-NEXT: vmovddup {{.*#+}} xmm0 {%k1} {z} = xmm0[0,0]
; X32-NEXT: popl %eax ; X32-NEXT: popl %eax
; X32-NEXT: .Lcfi15:
; X32-NEXT: .cfi_def_cfa_offset 4
; X32-NEXT: retl ; X32-NEXT: retl
; ;
; X64-LABEL: test_mm_maskz_broadcastsd_pd: ; X64-LABEL: test_mm_maskz_broadcastsd_pd:
@ -384,7 +400,7 @@ define <4 x double> @test_mm256_mask_broadcastsd_pd(<4 x double> %a0, i8 %a1, <2
; X32-LABEL: test_mm256_mask_broadcastsd_pd: ; X32-LABEL: test_mm256_mask_broadcastsd_pd:
; X32: # BB#0: ; X32: # BB#0:
; X32-NEXT: pushl %eax ; X32-NEXT: pushl %eax
; X32-NEXT: .Lcfi8: ; X32-NEXT: .Lcfi16:
; X32-NEXT: .cfi_def_cfa_offset 8 ; X32-NEXT: .cfi_def_cfa_offset 8
; X32-NEXT: movb {{[0-9]+}}(%esp), %al ; X32-NEXT: movb {{[0-9]+}}(%esp), %al
; X32-NEXT: andb $15, %al ; X32-NEXT: andb $15, %al
@ -393,6 +409,8 @@ define <4 x double> @test_mm256_mask_broadcastsd_pd(<4 x double> %a0, i8 %a1, <2
; X32-NEXT: kmovw %eax, %k1 ; X32-NEXT: kmovw %eax, %k1
; X32-NEXT: vbroadcastsd %xmm1, %ymm0 {%k1} ; X32-NEXT: vbroadcastsd %xmm1, %ymm0 {%k1}
; X32-NEXT: popl %eax ; X32-NEXT: popl %eax
; X32-NEXT: .Lcfi17:
; X32-NEXT: .cfi_def_cfa_offset 4
; X32-NEXT: retl ; X32-NEXT: retl
; ;
; X64-LABEL: test_mm256_mask_broadcastsd_pd: ; X64-LABEL: test_mm256_mask_broadcastsd_pd:
@ -414,7 +432,7 @@ define <4 x double> @test_mm256_maskz_broadcastsd_pd(i8 %a0, <2 x double> %a1) {
; X32-LABEL: test_mm256_maskz_broadcastsd_pd: ; X32-LABEL: test_mm256_maskz_broadcastsd_pd:
; X32: # BB#0: ; X32: # BB#0:
; X32-NEXT: pushl %eax ; X32-NEXT: pushl %eax
; X32-NEXT: .Lcfi9: ; X32-NEXT: .Lcfi18:
; X32-NEXT: .cfi_def_cfa_offset 8 ; X32-NEXT: .cfi_def_cfa_offset 8
; X32-NEXT: movb {{[0-9]+}}(%esp), %al ; X32-NEXT: movb {{[0-9]+}}(%esp), %al
; X32-NEXT: andb $15, %al ; X32-NEXT: andb $15, %al
@ -423,6 +441,8 @@ define <4 x double> @test_mm256_maskz_broadcastsd_pd(i8 %a0, <2 x double> %a1) {
; X32-NEXT: kmovw %eax, %k1 ; X32-NEXT: kmovw %eax, %k1
; X32-NEXT: vbroadcastsd %xmm0, %ymm0 {%k1} {z} ; X32-NEXT: vbroadcastsd %xmm0, %ymm0 {%k1} {z}
; X32-NEXT: popl %eax ; X32-NEXT: popl %eax
; X32-NEXT: .Lcfi19:
; X32-NEXT: .cfi_def_cfa_offset 4
; X32-NEXT: retl ; X32-NEXT: retl
; ;
; X64-LABEL: test_mm256_maskz_broadcastsd_pd: ; X64-LABEL: test_mm256_maskz_broadcastsd_pd:
@ -458,7 +478,7 @@ define <4 x float> @test_mm_mask_broadcastss_ps(<4 x float> %a0, i8 %a1, <4 x fl
; X32-LABEL: test_mm_mask_broadcastss_ps: ; X32-LABEL: test_mm_mask_broadcastss_ps:
; X32: # BB#0: ; X32: # BB#0:
; X32-NEXT: pushl %eax ; X32-NEXT: pushl %eax
; X32-NEXT: .Lcfi10: ; X32-NEXT: .Lcfi20:
; X32-NEXT: .cfi_def_cfa_offset 8 ; X32-NEXT: .cfi_def_cfa_offset 8
; X32-NEXT: movb {{[0-9]+}}(%esp), %al ; X32-NEXT: movb {{[0-9]+}}(%esp), %al
; X32-NEXT: andb $15, %al ; X32-NEXT: andb $15, %al
@ -467,6 +487,8 @@ define <4 x float> @test_mm_mask_broadcastss_ps(<4 x float> %a0, i8 %a1, <4 x fl
; X32-NEXT: kmovw %eax, %k1 ; X32-NEXT: kmovw %eax, %k1
; X32-NEXT: vbroadcastss %xmm1, %xmm0 {%k1} ; X32-NEXT: vbroadcastss %xmm1, %xmm0 {%k1}
; X32-NEXT: popl %eax ; X32-NEXT: popl %eax
; X32-NEXT: .Lcfi21:
; X32-NEXT: .cfi_def_cfa_offset 4
; X32-NEXT: retl ; X32-NEXT: retl
; ;
; X64-LABEL: test_mm_mask_broadcastss_ps: ; X64-LABEL: test_mm_mask_broadcastss_ps:
@ -488,7 +510,7 @@ define <4 x float> @test_mm_maskz_broadcastss_ps(i8 %a0, <4 x float> %a1) {
; X32-LABEL: test_mm_maskz_broadcastss_ps: ; X32-LABEL: test_mm_maskz_broadcastss_ps:
; X32: # BB#0: ; X32: # BB#0:
; X32-NEXT: pushl %eax ; X32-NEXT: pushl %eax
; X32-NEXT: .Lcfi11: ; X32-NEXT: .Lcfi22:
; X32-NEXT: .cfi_def_cfa_offset 8 ; X32-NEXT: .cfi_def_cfa_offset 8
; X32-NEXT: movb {{[0-9]+}}(%esp), %al ; X32-NEXT: movb {{[0-9]+}}(%esp), %al
; X32-NEXT: andb $15, %al ; X32-NEXT: andb $15, %al
@ -497,6 +519,8 @@ define <4 x float> @test_mm_maskz_broadcastss_ps(i8 %a0, <4 x float> %a1) {
; X32-NEXT: kmovw %eax, %k1 ; X32-NEXT: kmovw %eax, %k1
; X32-NEXT: vbroadcastss %xmm0, %xmm0 {%k1} {z} ; X32-NEXT: vbroadcastss %xmm0, %xmm0 {%k1} {z}
; X32-NEXT: popl %eax ; X32-NEXT: popl %eax
; X32-NEXT: .Lcfi23:
; X32-NEXT: .cfi_def_cfa_offset 4
; X32-NEXT: retl ; X32-NEXT: retl
; ;
; X64-LABEL: test_mm_maskz_broadcastss_ps: ; X64-LABEL: test_mm_maskz_broadcastss_ps:
@ -584,7 +608,7 @@ define <2 x double> @test_mm_mask_movddup_pd(<2 x double> %a0, i8 %a1, <2 x doub
; X32-LABEL: test_mm_mask_movddup_pd: ; X32-LABEL: test_mm_mask_movddup_pd:
; X32: # BB#0: ; X32: # BB#0:
; X32-NEXT: pushl %eax ; X32-NEXT: pushl %eax
; X32-NEXT: .Lcfi12: ; X32-NEXT: .Lcfi24:
; X32-NEXT: .cfi_def_cfa_offset 8 ; X32-NEXT: .cfi_def_cfa_offset 8
; X32-NEXT: movb {{[0-9]+}}(%esp), %al ; X32-NEXT: movb {{[0-9]+}}(%esp), %al
; X32-NEXT: andb $3, %al ; X32-NEXT: andb $3, %al
@ -593,6 +617,8 @@ define <2 x double> @test_mm_mask_movddup_pd(<2 x double> %a0, i8 %a1, <2 x doub
; X32-NEXT: kmovw %eax, %k1 ; X32-NEXT: kmovw %eax, %k1
; X32-NEXT: vmovddup {{.*#+}} xmm0 {%k1} = xmm1[0,0] ; X32-NEXT: vmovddup {{.*#+}} xmm0 {%k1} = xmm1[0,0]
; X32-NEXT: popl %eax ; X32-NEXT: popl %eax
; X32-NEXT: .Lcfi25:
; X32-NEXT: .cfi_def_cfa_offset 4
; X32-NEXT: retl ; X32-NEXT: retl
; ;
; X64-LABEL: test_mm_mask_movddup_pd: ; X64-LABEL: test_mm_mask_movddup_pd:
@ -614,7 +640,7 @@ define <2 x double> @test_mm_maskz_movddup_pd(i8 %a0, <2 x double> %a1) {
; X32-LABEL: test_mm_maskz_movddup_pd: ; X32-LABEL: test_mm_maskz_movddup_pd:
; X32: # BB#0: ; X32: # BB#0:
; X32-NEXT: pushl %eax ; X32-NEXT: pushl %eax
; X32-NEXT: .Lcfi13: ; X32-NEXT: .Lcfi26:
; X32-NEXT: .cfi_def_cfa_offset 8 ; X32-NEXT: .cfi_def_cfa_offset 8
; X32-NEXT: movb {{[0-9]+}}(%esp), %al ; X32-NEXT: movb {{[0-9]+}}(%esp), %al
; X32-NEXT: andb $3, %al ; X32-NEXT: andb $3, %al
@ -623,6 +649,8 @@ define <2 x double> @test_mm_maskz_movddup_pd(i8 %a0, <2 x double> %a1) {
; X32-NEXT: kmovw %eax, %k1 ; X32-NEXT: kmovw %eax, %k1
; X32-NEXT: vmovddup {{.*#+}} xmm0 {%k1} {z} = xmm0[0,0] ; X32-NEXT: vmovddup {{.*#+}} xmm0 {%k1} {z} = xmm0[0,0]
; X32-NEXT: popl %eax ; X32-NEXT: popl %eax
; X32-NEXT: .Lcfi27:
; X32-NEXT: .cfi_def_cfa_offset 4
; X32-NEXT: retl ; X32-NEXT: retl
; ;
; X64-LABEL: test_mm_maskz_movddup_pd: ; X64-LABEL: test_mm_maskz_movddup_pd:
@ -658,7 +686,7 @@ define <4 x double> @test_mm256_mask_movddup_pd(<4 x double> %a0, i8 %a1, <4 x d
; X32-LABEL: test_mm256_mask_movddup_pd: ; X32-LABEL: test_mm256_mask_movddup_pd:
; X32: # BB#0: ; X32: # BB#0:
; X32-NEXT: pushl %eax ; X32-NEXT: pushl %eax
; X32-NEXT: .Lcfi14: ; X32-NEXT: .Lcfi28:
; X32-NEXT: .cfi_def_cfa_offset 8 ; X32-NEXT: .cfi_def_cfa_offset 8
; X32-NEXT: movb {{[0-9]+}}(%esp), %al ; X32-NEXT: movb {{[0-9]+}}(%esp), %al
; X32-NEXT: andb $15, %al ; X32-NEXT: andb $15, %al
@ -667,6 +695,8 @@ define <4 x double> @test_mm256_mask_movddup_pd(<4 x double> %a0, i8 %a1, <4 x d
; X32-NEXT: kmovw %eax, %k1 ; X32-NEXT: kmovw %eax, %k1
; X32-NEXT: vmovddup {{.*#+}} ymm0 {%k1} = ymm1[0,0,2,2] ; X32-NEXT: vmovddup {{.*#+}} ymm0 {%k1} = ymm1[0,0,2,2]
; X32-NEXT: popl %eax ; X32-NEXT: popl %eax
; X32-NEXT: .Lcfi29:
; X32-NEXT: .cfi_def_cfa_offset 4
; X32-NEXT: retl ; X32-NEXT: retl
; ;
; X64-LABEL: test_mm256_mask_movddup_pd: ; X64-LABEL: test_mm256_mask_movddup_pd:
@ -688,7 +718,7 @@ define <4 x double> @test_mm256_maskz_movddup_pd(i8 %a0, <4 x double> %a1) {
; X32-LABEL: test_mm256_maskz_movddup_pd: ; X32-LABEL: test_mm256_maskz_movddup_pd:
; X32: # BB#0: ; X32: # BB#0:
; X32-NEXT: pushl %eax ; X32-NEXT: pushl %eax
; X32-NEXT: .Lcfi15: ; X32-NEXT: .Lcfi30:
; X32-NEXT: .cfi_def_cfa_offset 8 ; X32-NEXT: .cfi_def_cfa_offset 8
; X32-NEXT: movb {{[0-9]+}}(%esp), %al ; X32-NEXT: movb {{[0-9]+}}(%esp), %al
; X32-NEXT: andb $15, %al ; X32-NEXT: andb $15, %al
@ -697,6 +727,8 @@ define <4 x double> @test_mm256_maskz_movddup_pd(i8 %a0, <4 x double> %a1) {
; X32-NEXT: kmovw %eax, %k1 ; X32-NEXT: kmovw %eax, %k1
; X32-NEXT: vmovddup {{.*#+}} ymm0 {%k1} {z} = ymm0[0,0,2,2] ; X32-NEXT: vmovddup {{.*#+}} ymm0 {%k1} {z} = ymm0[0,0,2,2]
; X32-NEXT: popl %eax ; X32-NEXT: popl %eax
; X32-NEXT: .Lcfi31:
; X32-NEXT: .cfi_def_cfa_offset 4
; X32-NEXT: retl ; X32-NEXT: retl
; ;
; X64-LABEL: test_mm256_maskz_movddup_pd: ; X64-LABEL: test_mm256_maskz_movddup_pd:
@ -732,7 +764,7 @@ define <4 x float> @test_mm_mask_movehdup_ps(<4 x float> %a0, i8 %a1, <4 x float
; X32-LABEL: test_mm_mask_movehdup_ps: ; X32-LABEL: test_mm_mask_movehdup_ps:
; X32: # BB#0: ; X32: # BB#0:
; X32-NEXT: pushl %eax ; X32-NEXT: pushl %eax
; X32-NEXT: .Lcfi16: ; X32-NEXT: .Lcfi32:
; X32-NEXT: .cfi_def_cfa_offset 8 ; X32-NEXT: .cfi_def_cfa_offset 8
; X32-NEXT: movb {{[0-9]+}}(%esp), %al ; X32-NEXT: movb {{[0-9]+}}(%esp), %al
; X32-NEXT: andb $15, %al ; X32-NEXT: andb $15, %al
@ -741,6 +773,8 @@ define <4 x float> @test_mm_mask_movehdup_ps(<4 x float> %a0, i8 %a1, <4 x float
; X32-NEXT: kmovw %eax, %k1 ; X32-NEXT: kmovw %eax, %k1
; X32-NEXT: vmovshdup {{.*#+}} xmm0 {%k1} = xmm1[1,1,3,3] ; X32-NEXT: vmovshdup {{.*#+}} xmm0 {%k1} = xmm1[1,1,3,3]
; X32-NEXT: popl %eax ; X32-NEXT: popl %eax
; X32-NEXT: .Lcfi33:
; X32-NEXT: .cfi_def_cfa_offset 4
; X32-NEXT: retl ; X32-NEXT: retl
; ;
; X64-LABEL: test_mm_mask_movehdup_ps: ; X64-LABEL: test_mm_mask_movehdup_ps:
@ -762,7 +796,7 @@ define <4 x float> @test_mm_maskz_movehdup_ps(i8 %a0, <4 x float> %a1) {
; X32-LABEL: test_mm_maskz_movehdup_ps: ; X32-LABEL: test_mm_maskz_movehdup_ps:
; X32: # BB#0: ; X32: # BB#0:
; X32-NEXT: pushl %eax ; X32-NEXT: pushl %eax
; X32-NEXT: .Lcfi17: ; X32-NEXT: .Lcfi34:
; X32-NEXT: .cfi_def_cfa_offset 8 ; X32-NEXT: .cfi_def_cfa_offset 8
; X32-NEXT: movb {{[0-9]+}}(%esp), %al ; X32-NEXT: movb {{[0-9]+}}(%esp), %al
; X32-NEXT: andb $15, %al ; X32-NEXT: andb $15, %al
@ -771,6 +805,8 @@ define <4 x float> @test_mm_maskz_movehdup_ps(i8 %a0, <4 x float> %a1) {
; X32-NEXT: kmovw %eax, %k1 ; X32-NEXT: kmovw %eax, %k1
; X32-NEXT: vmovshdup {{.*#+}} xmm0 {%k1} {z} = xmm0[1,1,3,3] ; X32-NEXT: vmovshdup {{.*#+}} xmm0 {%k1} {z} = xmm0[1,1,3,3]
; X32-NEXT: popl %eax ; X32-NEXT: popl %eax
; X32-NEXT: .Lcfi35:
; X32-NEXT: .cfi_def_cfa_offset 4
; X32-NEXT: retl ; X32-NEXT: retl
; ;
; X64-LABEL: test_mm_maskz_movehdup_ps: ; X64-LABEL: test_mm_maskz_movehdup_ps:
@ -858,7 +894,7 @@ define <4 x float> @test_mm_mask_moveldup_ps(<4 x float> %a0, i8 %a1, <4 x float
; X32-LABEL: test_mm_mask_moveldup_ps: ; X32-LABEL: test_mm_mask_moveldup_ps:
; X32: # BB#0: ; X32: # BB#0:
; X32-NEXT: pushl %eax ; X32-NEXT: pushl %eax
; X32-NEXT: .Lcfi18: ; X32-NEXT: .Lcfi36:
; X32-NEXT: .cfi_def_cfa_offset 8 ; X32-NEXT: .cfi_def_cfa_offset 8
; X32-NEXT: movb {{[0-9]+}}(%esp), %al ; X32-NEXT: movb {{[0-9]+}}(%esp), %al
; X32-NEXT: andb $15, %al ; X32-NEXT: andb $15, %al
@ -867,6 +903,8 @@ define <4 x float> @test_mm_mask_moveldup_ps(<4 x float> %a0, i8 %a1, <4 x float
; X32-NEXT: kmovw %eax, %k1 ; X32-NEXT: kmovw %eax, %k1
; X32-NEXT: vmovsldup {{.*#+}} xmm0 {%k1} = xmm1[0,0,2,2] ; X32-NEXT: vmovsldup {{.*#+}} xmm0 {%k1} = xmm1[0,0,2,2]
; X32-NEXT: popl %eax ; X32-NEXT: popl %eax
; X32-NEXT: .Lcfi37:
; X32-NEXT: .cfi_def_cfa_offset 4
; X32-NEXT: retl ; X32-NEXT: retl
; ;
; X64-LABEL: test_mm_mask_moveldup_ps: ; X64-LABEL: test_mm_mask_moveldup_ps:
@ -888,7 +926,7 @@ define <4 x float> @test_mm_maskz_moveldup_ps(i8 %a0, <4 x float> %a1) {
; X32-LABEL: test_mm_maskz_moveldup_ps: ; X32-LABEL: test_mm_maskz_moveldup_ps:
; X32: # BB#0: ; X32: # BB#0:
; X32-NEXT: pushl %eax ; X32-NEXT: pushl %eax
; X32-NEXT: .Lcfi19: ; X32-NEXT: .Lcfi38:
; X32-NEXT: .cfi_def_cfa_offset 8 ; X32-NEXT: .cfi_def_cfa_offset 8
; X32-NEXT: movb {{[0-9]+}}(%esp), %al ; X32-NEXT: movb {{[0-9]+}}(%esp), %al
; X32-NEXT: andb $15, %al ; X32-NEXT: andb $15, %al
@ -897,6 +935,8 @@ define <4 x float> @test_mm_maskz_moveldup_ps(i8 %a0, <4 x float> %a1) {
; X32-NEXT: kmovw %eax, %k1 ; X32-NEXT: kmovw %eax, %k1
; X32-NEXT: vmovsldup {{.*#+}} xmm0 {%k1} {z} = xmm0[0,0,2,2] ; X32-NEXT: vmovsldup {{.*#+}} xmm0 {%k1} {z} = xmm0[0,0,2,2]
; X32-NEXT: popl %eax ; X32-NEXT: popl %eax
; X32-NEXT: .Lcfi39:
; X32-NEXT: .cfi_def_cfa_offset 4
; X32-NEXT: retl ; X32-NEXT: retl
; ;
; X64-LABEL: test_mm_maskz_moveldup_ps: ; X64-LABEL: test_mm_maskz_moveldup_ps:
@ -984,7 +1024,7 @@ define <4 x i64> @test_mm256_mask_permutex_epi64(<4 x i64> %a0, i8 %a1, <4 x i64
; X32-LABEL: test_mm256_mask_permutex_epi64: ; X32-LABEL: test_mm256_mask_permutex_epi64:
; X32: # BB#0: ; X32: # BB#0:
; X32-NEXT: pushl %eax ; X32-NEXT: pushl %eax
; X32-NEXT: .Lcfi20: ; X32-NEXT: .Lcfi40:
; X32-NEXT: .cfi_def_cfa_offset 8 ; X32-NEXT: .cfi_def_cfa_offset 8
; X32-NEXT: movb {{[0-9]+}}(%esp), %al ; X32-NEXT: movb {{[0-9]+}}(%esp), %al
; X32-NEXT: andb $15, %al ; X32-NEXT: andb $15, %al
@ -993,6 +1033,8 @@ define <4 x i64> @test_mm256_mask_permutex_epi64(<4 x i64> %a0, i8 %a1, <4 x i64
; X32-NEXT: kmovw %eax, %k1 ; X32-NEXT: kmovw %eax, %k1
; X32-NEXT: vpermq {{.*#+}} ymm0 {%k1} = ymm1[1,0,0,0] ; X32-NEXT: vpermq {{.*#+}} ymm0 {%k1} = ymm1[1,0,0,0]
; X32-NEXT: popl %eax ; X32-NEXT: popl %eax
; X32-NEXT: .Lcfi41:
; X32-NEXT: .cfi_def_cfa_offset 4
; X32-NEXT: retl ; X32-NEXT: retl
; ;
; X64-LABEL: test_mm256_mask_permutex_epi64: ; X64-LABEL: test_mm256_mask_permutex_epi64:
@ -1014,7 +1056,7 @@ define <4 x i64> @test_mm256_maskz_permutex_epi64(i8 %a0, <4 x i64> %a1) {
; X32-LABEL: test_mm256_maskz_permutex_epi64: ; X32-LABEL: test_mm256_maskz_permutex_epi64:
; X32: # BB#0: ; X32: # BB#0:
; X32-NEXT: pushl %eax ; X32-NEXT: pushl %eax
; X32-NEXT: .Lcfi21: ; X32-NEXT: .Lcfi42:
; X32-NEXT: .cfi_def_cfa_offset 8 ; X32-NEXT: .cfi_def_cfa_offset 8
; X32-NEXT: movb {{[0-9]+}}(%esp), %al ; X32-NEXT: movb {{[0-9]+}}(%esp), %al
; X32-NEXT: andb $15, %al ; X32-NEXT: andb $15, %al
@ -1023,6 +1065,8 @@ define <4 x i64> @test_mm256_maskz_permutex_epi64(i8 %a0, <4 x i64> %a1) {
; X32-NEXT: kmovw %eax, %k1 ; X32-NEXT: kmovw %eax, %k1
; X32-NEXT: vpermq {{.*#+}} ymm0 {%k1} {z} = ymm0[1,0,0,0] ; X32-NEXT: vpermq {{.*#+}} ymm0 {%k1} {z} = ymm0[1,0,0,0]
; X32-NEXT: popl %eax ; X32-NEXT: popl %eax
; X32-NEXT: .Lcfi43:
; X32-NEXT: .cfi_def_cfa_offset 4
; X32-NEXT: retl ; X32-NEXT: retl
; ;
; X64-LABEL: test_mm256_maskz_permutex_epi64: ; X64-LABEL: test_mm256_maskz_permutex_epi64:
@ -1058,7 +1102,7 @@ define <4 x double> @test_mm256_mask_permutex_pd(<4 x double> %a0, i8 %a1, <4 x
; X32-LABEL: test_mm256_mask_permutex_pd: ; X32-LABEL: test_mm256_mask_permutex_pd:
; X32: # BB#0: ; X32: # BB#0:
; X32-NEXT: pushl %eax ; X32-NEXT: pushl %eax
; X32-NEXT: .Lcfi22: ; X32-NEXT: .Lcfi44:
; X32-NEXT: .cfi_def_cfa_offset 8 ; X32-NEXT: .cfi_def_cfa_offset 8
; X32-NEXT: movb {{[0-9]+}}(%esp), %al ; X32-NEXT: movb {{[0-9]+}}(%esp), %al
; X32-NEXT: andb $15, %al ; X32-NEXT: andb $15, %al
@ -1067,6 +1111,8 @@ define <4 x double> @test_mm256_mask_permutex_pd(<4 x double> %a0, i8 %a1, <4 x
; X32-NEXT: kmovw %eax, %k1 ; X32-NEXT: kmovw %eax, %k1
; X32-NEXT: vpermpd {{.*#+}} ymm0 {%k1} = ymm1[1,0,0,0] ; X32-NEXT: vpermpd {{.*#+}} ymm0 {%k1} = ymm1[1,0,0,0]
; X32-NEXT: popl %eax ; X32-NEXT: popl %eax
; X32-NEXT: .Lcfi45:
; X32-NEXT: .cfi_def_cfa_offset 4
; X32-NEXT: retl ; X32-NEXT: retl
; ;
; X64-LABEL: test_mm256_mask_permutex_pd: ; X64-LABEL: test_mm256_mask_permutex_pd:
@ -1088,7 +1134,7 @@ define <4 x double> @test_mm256_maskz_permutex_pd(i8 %a0, <4 x double> %a1) {
; X32-LABEL: test_mm256_maskz_permutex_pd: ; X32-LABEL: test_mm256_maskz_permutex_pd:
; X32: # BB#0: ; X32: # BB#0:
; X32-NEXT: pushl %eax ; X32-NEXT: pushl %eax
; X32-NEXT: .Lcfi23: ; X32-NEXT: .Lcfi46:
; X32-NEXT: .cfi_def_cfa_offset 8 ; X32-NEXT: .cfi_def_cfa_offset 8
; X32-NEXT: movb {{[0-9]+}}(%esp), %al ; X32-NEXT: movb {{[0-9]+}}(%esp), %al
; X32-NEXT: andb $15, %al ; X32-NEXT: andb $15, %al
@ -1097,6 +1143,8 @@ define <4 x double> @test_mm256_maskz_permutex_pd(i8 %a0, <4 x double> %a1) {
; X32-NEXT: kmovw %eax, %k1 ; X32-NEXT: kmovw %eax, %k1
; X32-NEXT: vpermpd {{.*#+}} ymm0 {%k1} {z} = ymm0[1,0,0,0] ; X32-NEXT: vpermpd {{.*#+}} ymm0 {%k1} {z} = ymm0[1,0,0,0]
; X32-NEXT: popl %eax ; X32-NEXT: popl %eax
; X32-NEXT: .Lcfi47:
; X32-NEXT: .cfi_def_cfa_offset 4
; X32-NEXT: retl ; X32-NEXT: retl
; ;
; X64-LABEL: test_mm256_maskz_permutex_pd: ; X64-LABEL: test_mm256_maskz_permutex_pd:
@ -1132,7 +1180,7 @@ define <2 x double> @test_mm_mask_shuffle_pd(<2 x double> %a0, i8 %a1, <2 x doub
; X32-LABEL: test_mm_mask_shuffle_pd: ; X32-LABEL: test_mm_mask_shuffle_pd:
; X32: # BB#0: ; X32: # BB#0:
; X32-NEXT: pushl %eax ; X32-NEXT: pushl %eax
; X32-NEXT: .Lcfi24: ; X32-NEXT: .Lcfi48:
; X32-NEXT: .cfi_def_cfa_offset 8 ; X32-NEXT: .cfi_def_cfa_offset 8
; X32-NEXT: movb {{[0-9]+}}(%esp), %al ; X32-NEXT: movb {{[0-9]+}}(%esp), %al
; X32-NEXT: andb $3, %al ; X32-NEXT: andb $3, %al
@ -1141,6 +1189,8 @@ define <2 x double> @test_mm_mask_shuffle_pd(<2 x double> %a0, i8 %a1, <2 x doub
; X32-NEXT: kmovw %eax, %k1 ; X32-NEXT: kmovw %eax, %k1
; X32-NEXT: vunpckhpd {{.*#+}} xmm0 {%k1} = xmm1[1],xmm2[1] ; X32-NEXT: vunpckhpd {{.*#+}} xmm0 {%k1} = xmm1[1],xmm2[1]
; X32-NEXT: popl %eax ; X32-NEXT: popl %eax
; X32-NEXT: .Lcfi49:
; X32-NEXT: .cfi_def_cfa_offset 4
; X32-NEXT: retl ; X32-NEXT: retl
; ;
; X64-LABEL: test_mm_mask_shuffle_pd: ; X64-LABEL: test_mm_mask_shuffle_pd:
@ -1162,7 +1212,7 @@ define <2 x double> @test_mm_maskz_shuffle_pd(i8 %a0, <2 x double> %a1, <2 x dou
; X32-LABEL: test_mm_maskz_shuffle_pd: ; X32-LABEL: test_mm_maskz_shuffle_pd:
; X32: # BB#0: ; X32: # BB#0:
; X32-NEXT: pushl %eax ; X32-NEXT: pushl %eax
; X32-NEXT: .Lcfi25: ; X32-NEXT: .Lcfi50:
; X32-NEXT: .cfi_def_cfa_offset 8 ; X32-NEXT: .cfi_def_cfa_offset 8
; X32-NEXT: movb {{[0-9]+}}(%esp), %al ; X32-NEXT: movb {{[0-9]+}}(%esp), %al
; X32-NEXT: andb $3, %al ; X32-NEXT: andb $3, %al
@ -1171,6 +1221,8 @@ define <2 x double> @test_mm_maskz_shuffle_pd(i8 %a0, <2 x double> %a1, <2 x dou
; X32-NEXT: kmovw %eax, %k1 ; X32-NEXT: kmovw %eax, %k1
; X32-NEXT: vunpckhpd {{.*#+}} xmm0 {%k1} {z} = xmm0[1],xmm1[1] ; X32-NEXT: vunpckhpd {{.*#+}} xmm0 {%k1} {z} = xmm0[1],xmm1[1]
; X32-NEXT: popl %eax ; X32-NEXT: popl %eax
; X32-NEXT: .Lcfi51:
; X32-NEXT: .cfi_def_cfa_offset 4
; X32-NEXT: retl ; X32-NEXT: retl
; ;
; X64-LABEL: test_mm_maskz_shuffle_pd: ; X64-LABEL: test_mm_maskz_shuffle_pd:
@ -1206,7 +1258,7 @@ define <4 x double> @test_mm256_mask_shuffle_pd(<4 x double> %a0, i8 %a1, <4 x d
; X32-LABEL: test_mm256_mask_shuffle_pd: ; X32-LABEL: test_mm256_mask_shuffle_pd:
; X32: # BB#0: ; X32: # BB#0:
; X32-NEXT: pushl %eax ; X32-NEXT: pushl %eax
; X32-NEXT: .Lcfi26: ; X32-NEXT: .Lcfi52:
; X32-NEXT: .cfi_def_cfa_offset 8 ; X32-NEXT: .cfi_def_cfa_offset 8
; X32-NEXT: movb {{[0-9]+}}(%esp), %al ; X32-NEXT: movb {{[0-9]+}}(%esp), %al
; X32-NEXT: andb $15, %al ; X32-NEXT: andb $15, %al
@ -1215,6 +1267,8 @@ define <4 x double> @test_mm256_mask_shuffle_pd(<4 x double> %a0, i8 %a1, <4 x d
; X32-NEXT: kmovw %eax, %k1 ; X32-NEXT: kmovw %eax, %k1
; X32-NEXT: vshufpd {{.*#+}} ymm0 {%k1} = ymm1[1],ymm2[1],ymm1[2],ymm2[2] ; X32-NEXT: vshufpd {{.*#+}} ymm0 {%k1} = ymm1[1],ymm2[1],ymm1[2],ymm2[2]
; X32-NEXT: popl %eax ; X32-NEXT: popl %eax
; X32-NEXT: .Lcfi53:
; X32-NEXT: .cfi_def_cfa_offset 4
; X32-NEXT: retl ; X32-NEXT: retl
; ;
; X64-LABEL: test_mm256_mask_shuffle_pd: ; X64-LABEL: test_mm256_mask_shuffle_pd:
@ -1236,7 +1290,7 @@ define <4 x double> @test_mm256_maskz_shuffle_pd(i8 %a0, <4 x double> %a1, <4 x
; X32-LABEL: test_mm256_maskz_shuffle_pd: ; X32-LABEL: test_mm256_maskz_shuffle_pd:
; X32: # BB#0: ; X32: # BB#0:
; X32-NEXT: pushl %eax ; X32-NEXT: pushl %eax
; X32-NEXT: .Lcfi27: ; X32-NEXT: .Lcfi54:
; X32-NEXT: .cfi_def_cfa_offset 8 ; X32-NEXT: .cfi_def_cfa_offset 8
; X32-NEXT: movb {{[0-9]+}}(%esp), %al ; X32-NEXT: movb {{[0-9]+}}(%esp), %al
; X32-NEXT: andb $15, %al ; X32-NEXT: andb $15, %al
@ -1245,6 +1299,8 @@ define <4 x double> @test_mm256_maskz_shuffle_pd(i8 %a0, <4 x double> %a1, <4 x
; X32-NEXT: kmovw %eax, %k1 ; X32-NEXT: kmovw %eax, %k1
; X32-NEXT: vshufpd {{.*#+}} ymm0 {%k1} {z} = ymm0[1],ymm1[1],ymm0[2],ymm1[2] ; X32-NEXT: vshufpd {{.*#+}} ymm0 {%k1} {z} = ymm0[1],ymm1[1],ymm0[2],ymm1[2]
; X32-NEXT: popl %eax ; X32-NEXT: popl %eax
; X32-NEXT: .Lcfi55:
; X32-NEXT: .cfi_def_cfa_offset 4
; X32-NEXT: retl ; X32-NEXT: retl
; ;
; X64-LABEL: test_mm256_maskz_shuffle_pd: ; X64-LABEL: test_mm256_maskz_shuffle_pd:
@ -1280,7 +1336,7 @@ define <4 x float> @test_mm_mask_shuffle_ps(<4 x float> %a0, i8 %a1, <4 x float>
; X32-LABEL: test_mm_mask_shuffle_ps: ; X32-LABEL: test_mm_mask_shuffle_ps:
; X32: # BB#0: ; X32: # BB#0:
; X32-NEXT: pushl %eax ; X32-NEXT: pushl %eax
; X32-NEXT: .Lcfi28: ; X32-NEXT: .Lcfi56:
; X32-NEXT: .cfi_def_cfa_offset 8 ; X32-NEXT: .cfi_def_cfa_offset 8
; X32-NEXT: movb {{[0-9]+}}(%esp), %al ; X32-NEXT: movb {{[0-9]+}}(%esp), %al
; X32-NEXT: andb $15, %al ; X32-NEXT: andb $15, %al
@ -1289,6 +1345,8 @@ define <4 x float> @test_mm_mask_shuffle_ps(<4 x float> %a0, i8 %a1, <4 x float>
; X32-NEXT: kmovw %eax, %k1 ; X32-NEXT: kmovw %eax, %k1
; X32-NEXT: vshufps {{.*#+}} xmm0 {%k1} = xmm1[0,1],xmm2[0,0] ; X32-NEXT: vshufps {{.*#+}} xmm0 {%k1} = xmm1[0,1],xmm2[0,0]
; X32-NEXT: popl %eax ; X32-NEXT: popl %eax
; X32-NEXT: .Lcfi57:
; X32-NEXT: .cfi_def_cfa_offset 4
; X32-NEXT: retl ; X32-NEXT: retl
; ;
; X64-LABEL: test_mm_mask_shuffle_ps: ; X64-LABEL: test_mm_mask_shuffle_ps:
@ -1310,7 +1368,7 @@ define <4 x float> @test_mm_maskz_shuffle_ps(i8 %a0, <4 x float> %a1, <4 x float
; X32-LABEL: test_mm_maskz_shuffle_ps: ; X32-LABEL: test_mm_maskz_shuffle_ps:
; X32: # BB#0: ; X32: # BB#0:
; X32-NEXT: pushl %eax ; X32-NEXT: pushl %eax
; X32-NEXT: .Lcfi29: ; X32-NEXT: .Lcfi58:
; X32-NEXT: .cfi_def_cfa_offset 8 ; X32-NEXT: .cfi_def_cfa_offset 8
; X32-NEXT: movb {{[0-9]+}}(%esp), %al ; X32-NEXT: movb {{[0-9]+}}(%esp), %al
; X32-NEXT: andb $15, %al ; X32-NEXT: andb $15, %al
@ -1319,6 +1377,8 @@ define <4 x float> @test_mm_maskz_shuffle_ps(i8 %a0, <4 x float> %a1, <4 x float
; X32-NEXT: kmovw %eax, %k1 ; X32-NEXT: kmovw %eax, %k1
; X32-NEXT: vshufps {{.*#+}} xmm0 {%k1} {z} = xmm0[0,1],xmm1[0,0] ; X32-NEXT: vshufps {{.*#+}} xmm0 {%k1} {z} = xmm0[0,1],xmm1[0,0]
; X32-NEXT: popl %eax ; X32-NEXT: popl %eax
; X32-NEXT: .Lcfi59:
; X32-NEXT: .cfi_def_cfa_offset 4
; X32-NEXT: retl ; X32-NEXT: retl
; ;
; X64-LABEL: test_mm_maskz_shuffle_ps: ; X64-LABEL: test_mm_maskz_shuffle_ps:

View File

@ -13,6 +13,8 @@ define <8 x float> @_256_broadcast_ss_spill(float %x) {
; CHECK-NEXT: callq func_f32 ; CHECK-NEXT: callq func_f32
; CHECK-NEXT: vbroadcastss (%rsp), %ymm0 # 16-byte Folded Reload ; CHECK-NEXT: vbroadcastss (%rsp), %ymm0 # 16-byte Folded Reload
; CHECK-NEXT: addq $24, %rsp ; CHECK-NEXT: addq $24, %rsp
; CHECK-NEXT: .Lcfi1:
; CHECK-NEXT: .cfi_def_cfa_offset 8
; CHECK-NEXT: retq ; CHECK-NEXT: retq
%a = fadd float %x, %x %a = fadd float %x, %x
call void @func_f32(float %a) call void @func_f32(float %a)
@ -25,13 +27,15 @@ define <4 x float> @_128_broadcast_ss_spill(float %x) {
; CHECK-LABEL: _128_broadcast_ss_spill: ; CHECK-LABEL: _128_broadcast_ss_spill:
; CHECK: # BB#0: ; CHECK: # BB#0:
; CHECK-NEXT: subq $24, %rsp ; CHECK-NEXT: subq $24, %rsp
; CHECK-NEXT: .Lcfi1: ; CHECK-NEXT: .Lcfi2:
; CHECK-NEXT: .cfi_def_cfa_offset 32 ; CHECK-NEXT: .cfi_def_cfa_offset 32
; CHECK-NEXT: vaddss %xmm0, %xmm0, %xmm0 ; CHECK-NEXT: vaddss %xmm0, %xmm0, %xmm0
; CHECK-NEXT: vmovaps %xmm0, (%rsp) # 16-byte Spill ; CHECK-NEXT: vmovaps %xmm0, (%rsp) # 16-byte Spill
; CHECK-NEXT: callq func_f32 ; CHECK-NEXT: callq func_f32
; CHECK-NEXT: vbroadcastss (%rsp), %xmm0 # 16-byte Folded Reload ; CHECK-NEXT: vbroadcastss (%rsp), %xmm0 # 16-byte Folded Reload
; CHECK-NEXT: addq $24, %rsp ; CHECK-NEXT: addq $24, %rsp
; CHECK-NEXT: .Lcfi3:
; CHECK-NEXT: .cfi_def_cfa_offset 8
; CHECK-NEXT: retq ; CHECK-NEXT: retq
%a = fadd float %x, %x %a = fadd float %x, %x
call void @func_f32(float %a) call void @func_f32(float %a)
@ -45,13 +49,15 @@ define <4 x double> @_256_broadcast_sd_spill(double %x) {
; CHECK-LABEL: _256_broadcast_sd_spill: ; CHECK-LABEL: _256_broadcast_sd_spill:
; CHECK: # BB#0: ; CHECK: # BB#0:
; CHECK-NEXT: subq $24, %rsp ; CHECK-NEXT: subq $24, %rsp
; CHECK-NEXT: .Lcfi2: ; CHECK-NEXT: .Lcfi4:
; CHECK-NEXT: .cfi_def_cfa_offset 32 ; CHECK-NEXT: .cfi_def_cfa_offset 32
; CHECK-NEXT: vaddsd %xmm0, %xmm0, %xmm0 ; CHECK-NEXT: vaddsd %xmm0, %xmm0, %xmm0
; CHECK-NEXT: vmovapd %xmm0, (%rsp) # 16-byte Spill ; CHECK-NEXT: vmovapd %xmm0, (%rsp) # 16-byte Spill
; CHECK-NEXT: callq func_f64 ; CHECK-NEXT: callq func_f64
; CHECK-NEXT: vbroadcastsd (%rsp), %ymm0 # 16-byte Folded Reload ; CHECK-NEXT: vbroadcastsd (%rsp), %ymm0 # 16-byte Folded Reload
; CHECK-NEXT: addq $24, %rsp ; CHECK-NEXT: addq $24, %rsp
; CHECK-NEXT: .Lcfi5:
; CHECK-NEXT: .cfi_def_cfa_offset 8
; CHECK-NEXT: retq ; CHECK-NEXT: retq
%a = fadd double %x, %x %a = fadd double %x, %x
call void @func_f64(double %a) call void @func_f64(double %a)

View File

@ -18,13 +18,19 @@ define i32 @my_get_xyz() {
; X32-NEXT: calll my_emutls_get_address@PLT ; X32-NEXT: calll my_emutls_get_address@PLT
; X32-NEXT: movl (%eax), %eax ; X32-NEXT: movl (%eax), %eax
; X32-NEXT: addl $8, %esp ; X32-NEXT: addl $8, %esp
; X32-NEXT: :
; X32-NEXT: .cfi_def_cfa_offset 8
; X32-NEXT: popl %ebx ; X32-NEXT: popl %ebx
; X32-NEXT: :
; X32-NEXT: .cfi_def_cfa_offset 4
; X32-NEXT: retl ; X32-NEXT: retl
; X64-LABEL: my_get_xyz: ; X64-LABEL: my_get_xyz:
; X64: movq my_emutls_v_xyz@GOTPCREL(%rip), %rdi ; X64: movq my_emutls_v_xyz@GOTPCREL(%rip), %rdi
; X64-NEXT: callq my_emutls_get_address@PLT ; X64-NEXT: callq my_emutls_get_address@PLT
; X64-NEXT: movl (%rax), %eax ; X64-NEXT: movl (%rax), %eax
; X64-NEXT: popq %rcx ; X64-NEXT: popq %rcx
; X64-NEXT: :
; X64-NEXT: .cfi_def_cfa_offset 8
; X64-NEXT: retq ; X64-NEXT: retq
entry: entry:
@ -44,13 +50,19 @@ define i32 @f1() {
; X32-NEXT: calll __emutls_get_address@PLT ; X32-NEXT: calll __emutls_get_address@PLT
; X32-NEXT: movl (%eax), %eax ; X32-NEXT: movl (%eax), %eax
; X32-NEXT: addl $8, %esp ; X32-NEXT: addl $8, %esp
; X32-NEXT: :
; X32-NEXT: .cfi_def_cfa_offset 8
; X32-NEXT: popl %ebx ; X32-NEXT: popl %ebx
; X32-NEXT: :
; X32-NEXT: .cfi_def_cfa_offset 4
; X32-NEXT: retl ; X32-NEXT: retl
; X64-LABEL: f1: ; X64-LABEL: f1:
; X64: leaq __emutls_v.i(%rip), %rdi ; X64: leaq __emutls_v.i(%rip), %rdi
; X64-NEXT: callq __emutls_get_address@PLT ; X64-NEXT: callq __emutls_get_address@PLT
; X64-NEXT: movl (%rax), %eax ; X64-NEXT: movl (%rax), %eax
; X64-NEXT: popq %rcx ; X64-NEXT: popq %rcx
; X64-NEXT: :
; X64-NEXT: .cfi_def_cfa_offset 8
; X64-NEXT: retq ; X64-NEXT: retq
entry: entry:

View File

@ -16,12 +16,16 @@ define i32 @my_get_xyz() {
; X32-NEXT: calll my_emutls_get_address ; X32-NEXT: calll my_emutls_get_address
; X32-NEXT: movl (%eax), %eax ; X32-NEXT: movl (%eax), %eax
; X32-NEXT: addl $12, %esp ; X32-NEXT: addl $12, %esp
; X32-NEXT: :
; X32-NEXT: .cfi_def_cfa_offset 4
; X32-NEXT: retl ; X32-NEXT: retl
; X64-LABEL: my_get_xyz: ; X64-LABEL: my_get_xyz:
; X64: movl $my_emutls_v_xyz, %edi ; X64: movl $my_emutls_v_xyz, %edi
; X64-NEXT: callq my_emutls_get_address ; X64-NEXT: callq my_emutls_get_address
; X64-NEXT: movl (%rax), %eax ; X64-NEXT: movl (%rax), %eax
; X64-NEXT: popq %rcx ; X64-NEXT: popq %rcx
; X64-NEXT: :
; X64-NEXT: .cfi_def_cfa_offset 8
; X64-NEXT: retq ; X64-NEXT: retq
entry: entry:
@ -45,12 +49,16 @@ define i32 @f1() {
; X32-NEXT: calll __emutls_get_address ; X32-NEXT: calll __emutls_get_address
; X32-NEXT: movl (%eax), %eax ; X32-NEXT: movl (%eax), %eax
; X32-NEXT: addl $12, %esp ; X32-NEXT: addl $12, %esp
; X32-NEXT: :
; X32-NEXT: .cfi_def_cfa_offset 4
; X32-NEXT: retl ; X32-NEXT: retl
; X64-LABEL: f1: ; X64-LABEL: f1:
; X64: movl $__emutls_v.i1, %edi ; X64: movl $__emutls_v.i1, %edi
; X64-NEXT: callq __emutls_get_address ; X64-NEXT: callq __emutls_get_address
; X64-NEXT: movl (%rax), %eax ; X64-NEXT: movl (%rax), %eax
; X64-NEXT: popq %rcx ; X64-NEXT: popq %rcx
; X64-NEXT: :
; X64-NEXT: .cfi_def_cfa_offset 8
; X64-NEXT: retq ; X64-NEXT: retq
entry: entry:
@ -63,11 +71,15 @@ define i32* @f2() {
; X32: movl $__emutls_v.i1, (%esp) ; X32: movl $__emutls_v.i1, (%esp)
; X32-NEXT: calll __emutls_get_address ; X32-NEXT: calll __emutls_get_address
; X32-NEXT: addl $12, %esp ; X32-NEXT: addl $12, %esp
; X32-NEXT: :
; X32-NEXT: .cfi_def_cfa_offset 4
; X32-NEXT: retl ; X32-NEXT: retl
; X64-LABEL: f2: ; X64-LABEL: f2:
; X64: movl $__emutls_v.i1, %edi ; X64: movl $__emutls_v.i1, %edi
; X64-NEXT: callq __emutls_get_address ; X64-NEXT: callq __emutls_get_address
; X64-NEXT: popq %rcx ; X64-NEXT: popq %rcx
; X64-NEXT: :
; X64-NEXT: .cfi_def_cfa_offset 8
; X64-NEXT: retq ; X64-NEXT: retq
entry: entry:
@ -92,6 +104,8 @@ define i32* @f4() {
; X32: movl $__emutls_v.i2, (%esp) ; X32: movl $__emutls_v.i2, (%esp)
; X32-NEXT: calll __emutls_get_address ; X32-NEXT: calll __emutls_get_address
; X32-NEXT: addl $12, %esp ; X32-NEXT: addl $12, %esp
; X32-NEXT: :
; X32-NEXT: .cfi_def_cfa_offset 4
; X32-NEXT: retl ; X32-NEXT: retl
entry: entry:
@ -116,6 +130,8 @@ define i32* @f6() {
; X32: movl $__emutls_v.i3, (%esp) ; X32: movl $__emutls_v.i3, (%esp)
; X32-NEXT: calll __emutls_get_address ; X32-NEXT: calll __emutls_get_address
; X32-NEXT: addl $12, %esp ; X32-NEXT: addl $12, %esp
; X32-NEXT: :
; X32-NEXT: .cfi_def_cfa_offset 4
; X32-NEXT: retl ; X32-NEXT: retl
entry: entry:
@ -128,6 +144,8 @@ define i32 @f7() {
; X32-NEXT: calll __emutls_get_address ; X32-NEXT: calll __emutls_get_address
; X32-NEXT: movl (%eax), %eax ; X32-NEXT: movl (%eax), %eax
; X32-NEXT: addl $12, %esp ; X32-NEXT: addl $12, %esp
; X32-NEXT: :
; X32-NEXT: .cfi_def_cfa_offset 4
; X32-NEXT: retl ; X32-NEXT: retl
entry: entry:
@ -140,6 +158,8 @@ define i32* @f8() {
; X32: movl $__emutls_v.i4, (%esp) ; X32: movl $__emutls_v.i4, (%esp)
; X32-NEXT: calll __emutls_get_address ; X32-NEXT: calll __emutls_get_address
; X32-NEXT: addl $12, %esp ; X32-NEXT: addl $12, %esp
; X32-NEXT: :
; X32-NEXT: .cfi_def_cfa_offset 4
; X32-NEXT: retl ; X32-NEXT: retl
entry: entry:
@ -152,6 +172,8 @@ define i32 @f9() {
; X32-NEXT: calll __emutls_get_address ; X32-NEXT: calll __emutls_get_address
; X32-NEXT: movl (%eax), %eax ; X32-NEXT: movl (%eax), %eax
; X32-NEXT: addl $12, %esp ; X32-NEXT: addl $12, %esp
; X32-NEXT: :
; X32-NEXT: .cfi_def_cfa_offset 4
; X32-NEXT: retl ; X32-NEXT: retl
entry: entry:
@ -164,6 +186,8 @@ define i32* @f10() {
; X32: movl $__emutls_v.i5, (%esp) ; X32: movl $__emutls_v.i5, (%esp)
; X32-NEXT: calll __emutls_get_address ; X32-NEXT: calll __emutls_get_address
; X32-NEXT: addl $12, %esp ; X32-NEXT: addl $12, %esp
; X32-NEXT: :
; X32-NEXT: .cfi_def_cfa_offset 4
; X32-NEXT: retl ; X32-NEXT: retl
entry: entry:
@ -176,6 +200,8 @@ define i16 @f11() {
; X32-NEXT: calll __emutls_get_address ; X32-NEXT: calll __emutls_get_address
; X32-NEXT: movzwl (%eax), %eax ; X32-NEXT: movzwl (%eax), %eax
; X32-NEXT: addl $12, %esp ; X32-NEXT: addl $12, %esp
; X32-NEXT: :
; X32-NEXT: .cfi_def_cfa_offset 4
; X32-NEXT: retl ; X32-NEXT: retl
entry: entry:
@ -189,6 +215,8 @@ define i32 @f12() {
; X32-NEXT: calll __emutls_get_address ; X32-NEXT: calll __emutls_get_address
; X32-NEXT: movswl (%eax), %eax ; X32-NEXT: movswl (%eax), %eax
; X32-NEXT: addl $12, %esp ; X32-NEXT: addl $12, %esp
; X32-NEXT: :
; X32-NEXT: .cfi_def_cfa_offset 4
; X32-NEXT: retl ; X32-NEXT: retl
entry: entry:
@ -203,6 +231,8 @@ define i8 @f13() {
; X32-NEXT: calll __emutls_get_address ; X32-NEXT: calll __emutls_get_address
; X32-NEXT: movb (%eax), %al ; X32-NEXT: movb (%eax), %al
; X32-NEXT: addl $12, %esp ; X32-NEXT: addl $12, %esp
; X32-NEXT: :
; X32-NEXT: .cfi_def_cfa_offset 4
; X32-NEXT: retl ; X32-NEXT: retl
entry: entry:
@ -216,6 +246,8 @@ define i32 @f14() {
; X32-NEXT: calll __emutls_get_address ; X32-NEXT: calll __emutls_get_address
; X32-NEXT: movsbl (%eax), %eax ; X32-NEXT: movsbl (%eax), %eax
; X32-NEXT: addl $12, %esp ; X32-NEXT: addl $12, %esp
; X32-NEXT: :
; X32-NEXT: .cfi_def_cfa_offset 4
; X32-NEXT: retl ; X32-NEXT: retl
entry: entry:

View File

@ -0,0 +1,44 @@
; RUN: llc -O0 %s -o - | FileCheck %s
; ModuleID = 'epilogue-cfi-fp.c'
source_filename = "epilogue-cfi-fp.c"
target datalayout = "e-m:e-p:32:32-f64:32:64-f80:32-n8:16:32-S128"
target triple = "i686-pc-linux"
; Function Attrs: noinline nounwind
define i32 @foo(i32 %i, i32 %j, i32 %k, i32 %l, i32 %m) #0 {
; CHECK-LABEL: foo:
; CHECK: popl %ebp
; CHECK-NEXT: :
; CHECK-NEXT: .cfi_def_cfa %esp, 4
; CHECK-NEXT: retl
entry:
%i.addr = alloca i32, align 4
%j.addr = alloca i32, align 4
%k.addr = alloca i32, align 4
%l.addr = alloca i32, align 4
%m.addr = alloca i32, align 4
store i32 %i, i32* %i.addr, align 4
store i32 %j, i32* %j.addr, align 4
store i32 %k, i32* %k.addr, align 4
store i32 %l, i32* %l.addr, align 4
store i32 %m, i32* %m.addr, align 4
ret i32 0
}
attributes #0 = { "no-frame-pointer-elim"="true" }
!llvm.dbg.cu = !{!0}
!llvm.module.flags = !{!3, !4, !5, !6, !7}
!0 = distinct !DICompileUnit(language: DW_LANG_C99, file: !1, producer: "clang version 5.0.0 (http://llvm.org/git/clang.git 3f8116e6a2815b1d5f3491493938d0c63c9f42c9) (http://llvm.org/git/llvm.git 4fde77f8f1a8e4482e69b6a7484bc7d1b99b3c0a)", isOptimized: false, runtimeVersion: 0, emissionKind: FullDebug, enums: !2)
!1 = !DIFile(filename: "epilogue-cfi-fp.c", directory: "epilogue-dwarf/test")
!2 = !{}
!3 = !{i32 1, !"NumRegisterParameters", i32 0}
!4 = !{i32 2, !"Dwarf Version", i32 4}
!5 = !{i32 2, !"Debug Info Version", i32 3}
!6 = !{i32 1, !"wchar_size", i32 4}
!7 = !{i32 7, !"PIC Level", i32 2}

View File

@ -0,0 +1,50 @@
; RUN: llc -O0 < %s | FileCheck %s
; ModuleID = 'epilogue-cfi-no-fp.c'
source_filename = "epilogue-cfi-no-fp.c"
target datalayout = "e-m:e-p:32:32-f64:32:64-f80:32-n8:16:32-S128"
target triple = "i686-pc-linux"
; Function Attrs: noinline nounwind
define i32 @foo(i32 %i, i32 %j, i32 %k, i32 %l, i32 %m) {
; CHECK-LABEL: foo:
; CHECK: addl $20, %esp
; CHECK-NEXT: :
; CHECK-NEXT: .cfi_def_cfa_offset 16
; CHECK-NEXT: popl %esi
; CHECK-NEXT: :
; CHECK-NEXT: .cfi_def_cfa_offset 12
; CHECK-NEXT: popl %edi
; CHECK-NEXT: :
; CHECK-NEXT: .cfi_def_cfa_offset 8
; CHECK-NEXT: popl %ebx
; CHECK-NEXT: :
; CHECK-NEXT: .cfi_def_cfa_offset 4
; CHECK-NEXT: retl
entry:
%i.addr = alloca i32, align 4
%j.addr = alloca i32, align 4
%k.addr = alloca i32, align 4
%l.addr = alloca i32, align 4
%m.addr = alloca i32, align 4
store i32 %i, i32* %i.addr, align 4
store i32 %j, i32* %j.addr, align 4
store i32 %k, i32* %k.addr, align 4
store i32 %l, i32* %l.addr, align 4
store i32 %m, i32* %m.addr, align 4
ret i32 0
}
!llvm.dbg.cu = !{!0}
!llvm.module.flags = !{!3, !4, !5, !6, !7}
!0 = distinct !DICompileUnit(language: DW_LANG_C99, file: !1, producer: "clang version 5.0.0 (http://llvm.org/git/clang.git 3f8116e6a2815b1d5f3491493938d0c63c9f42c9) (http://llvm.org/git/llvm.git 4fde77f8f1a8e4482e69b6a7484bc7d1b99b3c0a)", isOptimized: false, runtimeVersion: 0, emissionKind: FullDebug, enums: !2)
!1 = !DIFile(filename: "epilogue-cfi-no-fp.c", directory: "epilogue-dwarf/test")
!2 = !{}
!3 = !{i32 1, !"NumRegisterParameters", i32 0}
!4 = !{i32 2, !"Dwarf Version", i32 4}
!5 = !{i32 2, !"Debug Info Version", i32 3}
!6 = !{i32 1, !"wchar_size", i32 4}
!7 = !{i32 7, !"PIC Level", i32 2}

View File

@ -376,6 +376,8 @@ define <4 x double> @test_store_4xf64(<4 x double>* nocapture %addr, <4 x double
; SSE64-NEXT: movupd %xmm0, (%eax) ; SSE64-NEXT: movupd %xmm0, (%eax)
; SSE64-NEXT: movupd %xmm1, 16(%eax) ; SSE64-NEXT: movupd %xmm1, 16(%eax)
; SSE64-NEXT: addl $12, %esp ; SSE64-NEXT: addl $12, %esp
; SSE64-NEXT: .Lcfi1:
; SSE64-NEXT: .cfi_def_cfa_offset 4
; SSE64-NEXT: retl ; SSE64-NEXT: retl
; ;
; AVX32-LABEL: test_store_4xf64: ; AVX32-LABEL: test_store_4xf64:
@ -407,7 +409,7 @@ define <4 x double> @test_store_4xf64_aligned(<4 x double>* nocapture %addr, <4
; SSE64-LABEL: test_store_4xf64_aligned: ; SSE64-LABEL: test_store_4xf64_aligned:
; SSE64: # BB#0: ; SSE64: # BB#0:
; SSE64-NEXT: subl $12, %esp ; SSE64-NEXT: subl $12, %esp
; SSE64-NEXT: .Lcfi1: ; SSE64-NEXT: .Lcfi2:
; SSE64-NEXT: .cfi_def_cfa_offset 16 ; SSE64-NEXT: .cfi_def_cfa_offset 16
; SSE64-NEXT: movl {{[0-9]+}}(%esp), %eax ; SSE64-NEXT: movl {{[0-9]+}}(%esp), %eax
; SSE64-NEXT: addpd {{[0-9]+}}(%esp), %xmm1 ; SSE64-NEXT: addpd {{[0-9]+}}(%esp), %xmm1
@ -415,6 +417,8 @@ define <4 x double> @test_store_4xf64_aligned(<4 x double>* nocapture %addr, <4
; SSE64-NEXT: movapd %xmm0, (%eax) ; SSE64-NEXT: movapd %xmm0, (%eax)
; SSE64-NEXT: movapd %xmm1, 16(%eax) ; SSE64-NEXT: movapd %xmm1, 16(%eax)
; SSE64-NEXT: addl $12, %esp ; SSE64-NEXT: addl $12, %esp
; SSE64-NEXT: .Lcfi3:
; SSE64-NEXT: .cfi_def_cfa_offset 4
; SSE64-NEXT: retl ; SSE64-NEXT: retl
; ;
; AVX32-LABEL: test_store_4xf64_aligned: ; AVX32-LABEL: test_store_4xf64_aligned:
@ -446,7 +450,7 @@ define <16 x i32> @test_store_16xi32(<16 x i32>* nocapture %addr, <16 x i32> %va
; SSE64-LABEL: test_store_16xi32: ; SSE64-LABEL: test_store_16xi32:
; SSE64: # BB#0: ; SSE64: # BB#0:
; SSE64-NEXT: subl $12, %esp ; SSE64-NEXT: subl $12, %esp
; SSE64-NEXT: .Lcfi2: ; SSE64-NEXT: .Lcfi4:
; SSE64-NEXT: .cfi_def_cfa_offset 16 ; SSE64-NEXT: .cfi_def_cfa_offset 16
; SSE64-NEXT: movaps {{[0-9]+}}(%esp), %xmm3 ; SSE64-NEXT: movaps {{[0-9]+}}(%esp), %xmm3
; SSE64-NEXT: movl {{[0-9]+}}(%esp), %eax ; SSE64-NEXT: movl {{[0-9]+}}(%esp), %eax
@ -455,6 +459,8 @@ define <16 x i32> @test_store_16xi32(<16 x i32>* nocapture %addr, <16 x i32> %va
; SSE64-NEXT: movups %xmm2, 32(%eax) ; SSE64-NEXT: movups %xmm2, 32(%eax)
; SSE64-NEXT: movups %xmm3, 48(%eax) ; SSE64-NEXT: movups %xmm3, 48(%eax)
; SSE64-NEXT: addl $12, %esp ; SSE64-NEXT: addl $12, %esp
; SSE64-NEXT: .Lcfi5:
; SSE64-NEXT: .cfi_def_cfa_offset 4
; SSE64-NEXT: retl ; SSE64-NEXT: retl
; ;
; AVXONLY32-LABEL: test_store_16xi32: ; AVXONLY32-LABEL: test_store_16xi32:
@ -496,7 +502,7 @@ define <16 x i32> @test_store_16xi32_aligned(<16 x i32>* nocapture %addr, <16 x
; SSE64-LABEL: test_store_16xi32_aligned: ; SSE64-LABEL: test_store_16xi32_aligned:
; SSE64: # BB#0: ; SSE64: # BB#0:
; SSE64-NEXT: subl $12, %esp ; SSE64-NEXT: subl $12, %esp
; SSE64-NEXT: .Lcfi3: ; SSE64-NEXT: .Lcfi6:
; SSE64-NEXT: .cfi_def_cfa_offset 16 ; SSE64-NEXT: .cfi_def_cfa_offset 16
; SSE64-NEXT: movaps {{[0-9]+}}(%esp), %xmm3 ; SSE64-NEXT: movaps {{[0-9]+}}(%esp), %xmm3
; SSE64-NEXT: movl {{[0-9]+}}(%esp), %eax ; SSE64-NEXT: movl {{[0-9]+}}(%esp), %eax
@ -505,6 +511,8 @@ define <16 x i32> @test_store_16xi32_aligned(<16 x i32>* nocapture %addr, <16 x
; SSE64-NEXT: movaps %xmm2, 32(%eax) ; SSE64-NEXT: movaps %xmm2, 32(%eax)
; SSE64-NEXT: movaps %xmm3, 48(%eax) ; SSE64-NEXT: movaps %xmm3, 48(%eax)
; SSE64-NEXT: addl $12, %esp ; SSE64-NEXT: addl $12, %esp
; SSE64-NEXT: .Lcfi7:
; SSE64-NEXT: .cfi_def_cfa_offset 4
; SSE64-NEXT: retl ; SSE64-NEXT: retl
; ;
; AVXONLY32-LABEL: test_store_16xi32_aligned: ; AVXONLY32-LABEL: test_store_16xi32_aligned:
@ -546,7 +554,7 @@ define <16 x float> @test_store_16xf32(<16 x float>* nocapture %addr, <16 x floa
; SSE64-LABEL: test_store_16xf32: ; SSE64-LABEL: test_store_16xf32:
; SSE64: # BB#0: ; SSE64: # BB#0:
; SSE64-NEXT: subl $12, %esp ; SSE64-NEXT: subl $12, %esp
; SSE64-NEXT: .Lcfi4: ; SSE64-NEXT: .Lcfi8:
; SSE64-NEXT: .cfi_def_cfa_offset 16 ; SSE64-NEXT: .cfi_def_cfa_offset 16
; SSE64-NEXT: movaps {{[0-9]+}}(%esp), %xmm3 ; SSE64-NEXT: movaps {{[0-9]+}}(%esp), %xmm3
; SSE64-NEXT: movl {{[0-9]+}}(%esp), %eax ; SSE64-NEXT: movl {{[0-9]+}}(%esp), %eax
@ -555,6 +563,8 @@ define <16 x float> @test_store_16xf32(<16 x float>* nocapture %addr, <16 x floa
; SSE64-NEXT: movups %xmm2, 32(%eax) ; SSE64-NEXT: movups %xmm2, 32(%eax)
; SSE64-NEXT: movups %xmm3, 48(%eax) ; SSE64-NEXT: movups %xmm3, 48(%eax)
; SSE64-NEXT: addl $12, %esp ; SSE64-NEXT: addl $12, %esp
; SSE64-NEXT: .Lcfi9:
; SSE64-NEXT: .cfi_def_cfa_offset 4
; SSE64-NEXT: retl ; SSE64-NEXT: retl
; ;
; AVXONLY32-LABEL: test_store_16xf32: ; AVXONLY32-LABEL: test_store_16xf32:
@ -596,7 +606,7 @@ define <16 x float> @test_store_16xf32_aligned(<16 x float>* nocapture %addr, <1
; SSE64-LABEL: test_store_16xf32_aligned: ; SSE64-LABEL: test_store_16xf32_aligned:
; SSE64: # BB#0: ; SSE64: # BB#0:
; SSE64-NEXT: subl $12, %esp ; SSE64-NEXT: subl $12, %esp
; SSE64-NEXT: .Lcfi5: ; SSE64-NEXT: .Lcfi10:
; SSE64-NEXT: .cfi_def_cfa_offset 16 ; SSE64-NEXT: .cfi_def_cfa_offset 16
; SSE64-NEXT: movaps {{[0-9]+}}(%esp), %xmm3 ; SSE64-NEXT: movaps {{[0-9]+}}(%esp), %xmm3
; SSE64-NEXT: movl {{[0-9]+}}(%esp), %eax ; SSE64-NEXT: movl {{[0-9]+}}(%esp), %eax
@ -605,6 +615,8 @@ define <16 x float> @test_store_16xf32_aligned(<16 x float>* nocapture %addr, <1
; SSE64-NEXT: movaps %xmm2, 32(%eax) ; SSE64-NEXT: movaps %xmm2, 32(%eax)
; SSE64-NEXT: movaps %xmm3, 48(%eax) ; SSE64-NEXT: movaps %xmm3, 48(%eax)
; SSE64-NEXT: addl $12, %esp ; SSE64-NEXT: addl $12, %esp
; SSE64-NEXT: .Lcfi11:
; SSE64-NEXT: .cfi_def_cfa_offset 4
; SSE64-NEXT: retl ; SSE64-NEXT: retl
; ;
; AVXONLY32-LABEL: test_store_16xf32_aligned: ; AVXONLY32-LABEL: test_store_16xf32_aligned:
@ -650,7 +662,7 @@ define <8 x double> @test_store_8xf64(<8 x double>* nocapture %addr, <8 x double
; SSE64-LABEL: test_store_8xf64: ; SSE64-LABEL: test_store_8xf64:
; SSE64: # BB#0: ; SSE64: # BB#0:
; SSE64-NEXT: subl $12, %esp ; SSE64-NEXT: subl $12, %esp
; SSE64-NEXT: .Lcfi6: ; SSE64-NEXT: .Lcfi12:
; SSE64-NEXT: .cfi_def_cfa_offset 16 ; SSE64-NEXT: .cfi_def_cfa_offset 16
; SSE64-NEXT: movapd {{[0-9]+}}(%esp), %xmm3 ; SSE64-NEXT: movapd {{[0-9]+}}(%esp), %xmm3
; SSE64-NEXT: movl {{[0-9]+}}(%esp), %eax ; SSE64-NEXT: movl {{[0-9]+}}(%esp), %eax
@ -663,6 +675,8 @@ define <8 x double> @test_store_8xf64(<8 x double>* nocapture %addr, <8 x double
; SSE64-NEXT: movupd %xmm2, 32(%eax) ; SSE64-NEXT: movupd %xmm2, 32(%eax)
; SSE64-NEXT: movupd %xmm3, 48(%eax) ; SSE64-NEXT: movupd %xmm3, 48(%eax)
; SSE64-NEXT: addl $12, %esp ; SSE64-NEXT: addl $12, %esp
; SSE64-NEXT: .Lcfi13:
; SSE64-NEXT: .cfi_def_cfa_offset 4
; SSE64-NEXT: retl ; SSE64-NEXT: retl
; ;
; AVXONLY32-LABEL: test_store_8xf64: ; AVXONLY32-LABEL: test_store_8xf64:
@ -692,6 +706,8 @@ define <8 x double> @test_store_8xf64(<8 x double>* nocapture %addr, <8 x double
; AVXONLY64-NEXT: vmovupd %ymm1, 32(%eax) ; AVXONLY64-NEXT: vmovupd %ymm1, 32(%eax)
; AVXONLY64-NEXT: movl %ebp, %esp ; AVXONLY64-NEXT: movl %ebp, %esp
; AVXONLY64-NEXT: popl %ebp ; AVXONLY64-NEXT: popl %ebp
; AVXONLY64-NEXT: .Lcfi3:
; AVXONLY64-NEXT: .cfi_def_cfa %esp, 4
; AVXONLY64-NEXT: retl ; AVXONLY64-NEXT: retl
; ;
; AVX51232-LABEL: test_store_8xf64: ; AVX51232-LABEL: test_store_8xf64:
@ -727,7 +743,7 @@ define <8 x double> @test_store_8xf64_aligned(<8 x double>* nocapture %addr, <8
; SSE64-LABEL: test_store_8xf64_aligned: ; SSE64-LABEL: test_store_8xf64_aligned:
; SSE64: # BB#0: ; SSE64: # BB#0:
; SSE64-NEXT: subl $12, %esp ; SSE64-NEXT: subl $12, %esp
; SSE64-NEXT: .Lcfi7: ; SSE64-NEXT: .Lcfi14:
; SSE64-NEXT: .cfi_def_cfa_offset 16 ; SSE64-NEXT: .cfi_def_cfa_offset 16
; SSE64-NEXT: movapd {{[0-9]+}}(%esp), %xmm3 ; SSE64-NEXT: movapd {{[0-9]+}}(%esp), %xmm3
; SSE64-NEXT: movl {{[0-9]+}}(%esp), %eax ; SSE64-NEXT: movl {{[0-9]+}}(%esp), %eax
@ -740,6 +756,8 @@ define <8 x double> @test_store_8xf64_aligned(<8 x double>* nocapture %addr, <8
; SSE64-NEXT: movapd %xmm2, 32(%eax) ; SSE64-NEXT: movapd %xmm2, 32(%eax)
; SSE64-NEXT: movapd %xmm3, 48(%eax) ; SSE64-NEXT: movapd %xmm3, 48(%eax)
; SSE64-NEXT: addl $12, %esp ; SSE64-NEXT: addl $12, %esp
; SSE64-NEXT: .Lcfi15:
; SSE64-NEXT: .cfi_def_cfa_offset 4
; SSE64-NEXT: retl ; SSE64-NEXT: retl
; ;
; AVXONLY32-LABEL: test_store_8xf64_aligned: ; AVXONLY32-LABEL: test_store_8xf64_aligned:
@ -753,12 +771,12 @@ define <8 x double> @test_store_8xf64_aligned(<8 x double>* nocapture %addr, <8
; AVXONLY64-LABEL: test_store_8xf64_aligned: ; AVXONLY64-LABEL: test_store_8xf64_aligned:
; AVXONLY64: # BB#0: ; AVXONLY64: # BB#0:
; AVXONLY64-NEXT: pushl %ebp ; AVXONLY64-NEXT: pushl %ebp
; AVXONLY64-NEXT: .Lcfi3:
; AVXONLY64-NEXT: .cfi_def_cfa_offset 8
; AVXONLY64-NEXT: .Lcfi4: ; AVXONLY64-NEXT: .Lcfi4:
; AVXONLY64-NEXT: .cfi_def_cfa_offset 8
; AVXONLY64-NEXT: .Lcfi5:
; AVXONLY64-NEXT: .cfi_offset %ebp, -8 ; AVXONLY64-NEXT: .cfi_offset %ebp, -8
; AVXONLY64-NEXT: movl %esp, %ebp ; AVXONLY64-NEXT: movl %esp, %ebp
; AVXONLY64-NEXT: .Lcfi5: ; AVXONLY64-NEXT: .Lcfi6:
; AVXONLY64-NEXT: .cfi_def_cfa_register %ebp ; AVXONLY64-NEXT: .cfi_def_cfa_register %ebp
; AVXONLY64-NEXT: andl $-32, %esp ; AVXONLY64-NEXT: andl $-32, %esp
; AVXONLY64-NEXT: subl $32, %esp ; AVXONLY64-NEXT: subl $32, %esp
@ -769,6 +787,8 @@ define <8 x double> @test_store_8xf64_aligned(<8 x double>* nocapture %addr, <8
; AVXONLY64-NEXT: vmovapd %ymm1, 32(%eax) ; AVXONLY64-NEXT: vmovapd %ymm1, 32(%eax)
; AVXONLY64-NEXT: movl %ebp, %esp ; AVXONLY64-NEXT: movl %ebp, %esp
; AVXONLY64-NEXT: popl %ebp ; AVXONLY64-NEXT: popl %ebp
; AVXONLY64-NEXT: .Lcfi7:
; AVXONLY64-NEXT: .cfi_def_cfa %esp, 4
; AVXONLY64-NEXT: retl ; AVXONLY64-NEXT: retl
; ;
; AVX51232-LABEL: test_store_8xf64_aligned: ; AVX51232-LABEL: test_store_8xf64_aligned:

View File

@ -18,9 +18,14 @@ entry:
} }
; CHECK-LABEL: noDebug ; CHECK-LABEL: noDebug
; CHECK: addq $24, %rsp ; CHECK: addq $16, %rsp
; CHECK: addq $8, %rsp
; CHECK: popq %rbx ; CHECK: popq %rbx
; CHECK-NEXT: :
; CHECK-NEXT: .cfi_def_cfa_offset 16
; CHECK-NEXT: popq %r14 ; CHECK-NEXT: popq %r14
; CHECK-NEXT: :
; CHECK-NEXT: .cfi_def_cfa_offset 8
; CHECK-NEXT: retq ; CHECK-NEXT: retq
@ -41,9 +46,14 @@ entry:
; CHECK-LABEL: withDebug ; CHECK-LABEL: withDebug
; CHECK: #DEBUG_VALUE: test:j <- %RBX ; CHECK: #DEBUG_VALUE: test:j <- %RBX
; CHECK-NEXT: addq $24, %rsp ; CHECK-NEXT: addq $16, %rsp
; CHECK: addq $8, %rsp
; CHECK: popq %rbx ; CHECK: popq %rbx
; CHECK-NEXT: :
; CHECK-NEXT: .cfi_def_cfa_offset 16
; CHECK-NEXT: popq %r14 ; CHECK-NEXT: popq %r14
; CHECK-NEXT: :
; CHECK-NEXT: .cfi_def_cfa_offset 8
; CHECK-NEXT: retq ; CHECK-NEXT: retq
declare { i64, i1 } @llvm.uadd.with.overflow.i64(i64, i64) declare { i64, i1 } @llvm.uadd.with.overflow.i64(i64, i64)

View File

@ -9,6 +9,8 @@ define i64 @fn1NoDebug(i64 %a) {
; CHECK-LABEL: fn1NoDebug ; CHECK-LABEL: fn1NoDebug
; CHECK: popq %rcx ; CHECK: popq %rcx
; CHECK-NEXT: :
; CHECK-NEXT: .cfi_def_cfa_offset 8
; CHECK-NEXT: ret ; CHECK-NEXT: ret
define i64 @fn1WithDebug(i64 %a) !dbg !4 { define i64 @fn1WithDebug(i64 %a) !dbg !4 {
@ -19,6 +21,8 @@ define i64 @fn1WithDebug(i64 %a) !dbg !4 {
; CHECK-LABEL: fn1WithDebug ; CHECK-LABEL: fn1WithDebug
; CHECK: popq %rcx ; CHECK: popq %rcx
; CHECK-NEXT: :
; CHECK-NEXT: .cfi_def_cfa_offset 8
; CHECK-NEXT: ret ; CHECK-NEXT: ret
%struct.Buffer = type { i8, [63 x i8] } %struct.Buffer = type { i8, [63 x i8] }
@ -33,6 +37,8 @@ define void @fn2NoDebug(%struct.Buffer* byval align 64 %p1) {
; CHECK-NOT: sub ; CHECK-NOT: sub
; CHECK: mov ; CHECK: mov
; CHECK-NEXT: pop ; CHECK-NEXT: pop
; CHECK-NEXT: :
; CHECK-NEXT: .cfi_def_cfa %rsp, 8
; CHECK-NEXT: ret ; CHECK-NEXT: ret
define void @fn2WithDebug(%struct.Buffer* byval align 64 %p1) !dbg !8 { define void @fn2WithDebug(%struct.Buffer* byval align 64 %p1) !dbg !8 {
@ -46,6 +52,8 @@ define void @fn2WithDebug(%struct.Buffer* byval align 64 %p1) !dbg !8 {
; CHECK-NOT: sub ; CHECK-NOT: sub
; CHECK: mov ; CHECK: mov
; CHECK-NEXT: pop ; CHECK-NEXT: pop
; CHECK-NEXT: :
; CHECK-NEXT: .cfi_def_cfa %rsp, 8
; CHECK-NEXT: ret ; CHECK-NEXT: ret
declare i64 @fn(i64, i64) declare i64 @fn(i64, i64)

View File

@ -736,11 +736,23 @@ define <16 x i16> @avx2_vphadd_w_test(<16 x i16> %a, <16 x i16> %b) {
; SSE3-NEXT: punpckldq {{.*#+}} xmm1 = xmm1[0],xmm2[0],xmm1[1],xmm2[1] ; SSE3-NEXT: punpckldq {{.*#+}} xmm1 = xmm1[0],xmm2[0],xmm1[1],xmm2[1]
; SSE3-NEXT: punpcklqdq {{.*#+}} xmm1 = xmm1[0],xmm5[0] ; SSE3-NEXT: punpcklqdq {{.*#+}} xmm1 = xmm1[0],xmm5[0]
; SSE3-NEXT: popq %rbx ; SSE3-NEXT: popq %rbx
; SSE3-NEXT: .Lcfi12:
; SSE3-NEXT: .cfi_def_cfa_offset 48
; SSE3-NEXT: popq %r12 ; SSE3-NEXT: popq %r12
; SSE3-NEXT: .Lcfi13:
; SSE3-NEXT: .cfi_def_cfa_offset 40
; SSE3-NEXT: popq %r13 ; SSE3-NEXT: popq %r13
; SSE3-NEXT: .Lcfi14:
; SSE3-NEXT: .cfi_def_cfa_offset 32
; SSE3-NEXT: popq %r14 ; SSE3-NEXT: popq %r14
; SSE3-NEXT: .Lcfi15:
; SSE3-NEXT: .cfi_def_cfa_offset 24
; SSE3-NEXT: popq %r15 ; SSE3-NEXT: popq %r15
; SSE3-NEXT: .Lcfi16:
; SSE3-NEXT: .cfi_def_cfa_offset 16
; SSE3-NEXT: popq %rbp ; SSE3-NEXT: popq %rbp
; SSE3-NEXT: .Lcfi17:
; SSE3-NEXT: .cfi_def_cfa_offset 8
; SSE3-NEXT: retq ; SSE3-NEXT: retq
; ;
; SSSE3-LABEL: avx2_vphadd_w_test: ; SSSE3-LABEL: avx2_vphadd_w_test:
@ -1263,34 +1275,34 @@ define <16 x i16> @avx2_hadd_w(<16 x i16> %a, <16 x i16> %b) {
; SSE3-LABEL: avx2_hadd_w: ; SSE3-LABEL: avx2_hadd_w:
; SSE3: # BB#0: ; SSE3: # BB#0:
; SSE3-NEXT: pushq %rbp ; SSE3-NEXT: pushq %rbp
; SSE3-NEXT: .Lcfi12: ; SSE3-NEXT: .Lcfi18:
; SSE3-NEXT: .cfi_def_cfa_offset 16 ; SSE3-NEXT: .cfi_def_cfa_offset 16
; SSE3-NEXT: pushq %r15 ; SSE3-NEXT: pushq %r15
; SSE3-NEXT: .Lcfi13: ; SSE3-NEXT: .Lcfi19:
; SSE3-NEXT: .cfi_def_cfa_offset 24 ; SSE3-NEXT: .cfi_def_cfa_offset 24
; SSE3-NEXT: pushq %r14 ; SSE3-NEXT: pushq %r14
; SSE3-NEXT: .Lcfi14: ; SSE3-NEXT: .Lcfi20:
; SSE3-NEXT: .cfi_def_cfa_offset 32 ; SSE3-NEXT: .cfi_def_cfa_offset 32
; SSE3-NEXT: pushq %r13 ; SSE3-NEXT: pushq %r13
; SSE3-NEXT: .Lcfi15: ; SSE3-NEXT: .Lcfi21:
; SSE3-NEXT: .cfi_def_cfa_offset 40 ; SSE3-NEXT: .cfi_def_cfa_offset 40
; SSE3-NEXT: pushq %r12 ; SSE3-NEXT: pushq %r12
; SSE3-NEXT: .Lcfi16: ; SSE3-NEXT: .Lcfi22:
; SSE3-NEXT: .cfi_def_cfa_offset 48 ; SSE3-NEXT: .cfi_def_cfa_offset 48
; SSE3-NEXT: pushq %rbx ; SSE3-NEXT: pushq %rbx
; SSE3-NEXT: .Lcfi17:
; SSE3-NEXT: .cfi_def_cfa_offset 56
; SSE3-NEXT: .Lcfi18:
; SSE3-NEXT: .cfi_offset %rbx, -56
; SSE3-NEXT: .Lcfi19:
; SSE3-NEXT: .cfi_offset %r12, -48
; SSE3-NEXT: .Lcfi20:
; SSE3-NEXT: .cfi_offset %r13, -40
; SSE3-NEXT: .Lcfi21:
; SSE3-NEXT: .cfi_offset %r14, -32
; SSE3-NEXT: .Lcfi22:
; SSE3-NEXT: .cfi_offset %r15, -24
; SSE3-NEXT: .Lcfi23: ; SSE3-NEXT: .Lcfi23:
; SSE3-NEXT: .cfi_def_cfa_offset 56
; SSE3-NEXT: .Lcfi24:
; SSE3-NEXT: .cfi_offset %rbx, -56
; SSE3-NEXT: .Lcfi25:
; SSE3-NEXT: .cfi_offset %r12, -48
; SSE3-NEXT: .Lcfi26:
; SSE3-NEXT: .cfi_offset %r13, -40
; SSE3-NEXT: .Lcfi27:
; SSE3-NEXT: .cfi_offset %r14, -32
; SSE3-NEXT: .Lcfi28:
; SSE3-NEXT: .cfi_offset %r15, -24
; SSE3-NEXT: .Lcfi29:
; SSE3-NEXT: .cfi_offset %rbp, -16 ; SSE3-NEXT: .cfi_offset %rbp, -16
; SSE3-NEXT: movd %xmm0, %eax ; SSE3-NEXT: movd %xmm0, %eax
; SSE3-NEXT: pextrw $1, %xmm0, %r10d ; SSE3-NEXT: pextrw $1, %xmm0, %r10d
@ -1375,11 +1387,23 @@ define <16 x i16> @avx2_hadd_w(<16 x i16> %a, <16 x i16> %b) {
; SSE3-NEXT: punpckldq {{.*#+}} xmm1 = xmm1[0],xmm2[0],xmm1[1],xmm2[1] ; SSE3-NEXT: punpckldq {{.*#+}} xmm1 = xmm1[0],xmm2[0],xmm1[1],xmm2[1]
; SSE3-NEXT: punpcklqdq {{.*#+}} xmm1 = xmm1[0],xmm5[0] ; SSE3-NEXT: punpcklqdq {{.*#+}} xmm1 = xmm1[0],xmm5[0]
; SSE3-NEXT: popq %rbx ; SSE3-NEXT: popq %rbx
; SSE3-NEXT: .Lcfi30:
; SSE3-NEXT: .cfi_def_cfa_offset 48
; SSE3-NEXT: popq %r12 ; SSE3-NEXT: popq %r12
; SSE3-NEXT: .Lcfi31:
; SSE3-NEXT: .cfi_def_cfa_offset 40
; SSE3-NEXT: popq %r13 ; SSE3-NEXT: popq %r13
; SSE3-NEXT: .Lcfi32:
; SSE3-NEXT: .cfi_def_cfa_offset 32
; SSE3-NEXT: popq %r14 ; SSE3-NEXT: popq %r14
; SSE3-NEXT: .Lcfi33:
; SSE3-NEXT: .cfi_def_cfa_offset 24
; SSE3-NEXT: popq %r15 ; SSE3-NEXT: popq %r15
; SSE3-NEXT: .Lcfi34:
; SSE3-NEXT: .cfi_def_cfa_offset 16
; SSE3-NEXT: popq %rbp ; SSE3-NEXT: popq %rbp
; SSE3-NEXT: .Lcfi35:
; SSE3-NEXT: .cfi_def_cfa_offset 8
; SSE3-NEXT: retq ; SSE3-NEXT: retq
; ;
; SSSE3-LABEL: avx2_hadd_w: ; SSSE3-LABEL: avx2_hadd_w:

View File

@ -91,6 +91,8 @@ define cc 11 { i64, i64, i64 } @tailcaller(i64 %hp, i64 %p) #0 {
; CHECK-NEXT: movl $47, %ecx ; CHECK-NEXT: movl $47, %ecx
; CHECK-NEXT: movl $63, %r8d ; CHECK-NEXT: movl $63, %r8d
; CHECK-NEXT: popq %rax ; CHECK-NEXT: popq %rax
; CHECK-NEXT: :
; CHECK-NEXT: .cfi_def_cfa_offset 16
; CHECK-NEXT: jmp tailcallee ; CHECK-NEXT: jmp tailcallee
%ret = tail call cc11 { i64, i64, i64 } @tailcallee(i64 %hp, i64 %p, i64 15, %ret = tail call cc11 { i64, i64, i64 } @tailcallee(i64 %hp, i64 %p, i64 15,
i64 31, i64 47, i64 63, i64 79) #1 i64 31, i64 47, i64 63, i64 79) #1

View File

@ -309,6 +309,8 @@ define i64 @test5(i64 %a) {
; X86-NEXT: subl %ecx, %edx ; X86-NEXT: subl %ecx, %edx
; X86-NEXT: subl %esi, %edx ; X86-NEXT: subl %esi, %edx
; X86-NEXT: popl %esi ; X86-NEXT: popl %esi
; X86-NEXT: .Lcfi2:
; X86-NEXT: .cfi_def_cfa_offset 4
; X86-NEXT: retl ; X86-NEXT: retl
entry: entry:
%tmp3 = mul i64 %a, -31 %tmp3 = mul i64 %a, -31
@ -351,9 +353,9 @@ define i64 @test7(i64 %a) {
; X86-LABEL: test7: ; X86-LABEL: test7:
; X86: # BB#0: # %entry ; X86: # BB#0: # %entry
; X86-NEXT: pushl %esi ; X86-NEXT: pushl %esi
; X86-NEXT: .Lcfi2:
; X86-NEXT: .cfi_def_cfa_offset 8
; X86-NEXT: .Lcfi3: ; X86-NEXT: .Lcfi3:
; X86-NEXT: .cfi_def_cfa_offset 8
; X86-NEXT: .Lcfi4:
; X86-NEXT: .cfi_offset %esi, -8 ; X86-NEXT: .cfi_offset %esi, -8
; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx ; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
; X86-NEXT: movl {{[0-9]+}}(%esp), %eax ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
@ -366,6 +368,8 @@ define i64 @test7(i64 %a) {
; X86-NEXT: subl %ecx, %edx ; X86-NEXT: subl %ecx, %edx
; X86-NEXT: subl %esi, %edx ; X86-NEXT: subl %esi, %edx
; X86-NEXT: popl %esi ; X86-NEXT: popl %esi
; X86-NEXT: .Lcfi5:
; X86-NEXT: .cfi_def_cfa_offset 4
; X86-NEXT: retl ; X86-NEXT: retl
entry: entry:
%tmp3 = mul i64 %a, -33 %tmp3 = mul i64 %a, -33
@ -382,9 +386,9 @@ define i64 @testOverflow(i64 %a) {
; X86-LABEL: testOverflow: ; X86-LABEL: testOverflow:
; X86: # BB#0: # %entry ; X86: # BB#0: # %entry
; X86-NEXT: pushl %esi ; X86-NEXT: pushl %esi
; X86-NEXT: .Lcfi4: ; X86-NEXT: .Lcfi6:
; X86-NEXT: .cfi_def_cfa_offset 8 ; X86-NEXT: .cfi_def_cfa_offset 8
; X86-NEXT: .Lcfi5: ; X86-NEXT: .Lcfi7:
; X86-NEXT: .cfi_offset %esi, -8 ; X86-NEXT: .cfi_offset %esi, -8
; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx ; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
; X86-NEXT: movl $-1, %edx ; X86-NEXT: movl $-1, %edx
@ -396,6 +400,8 @@ define i64 @testOverflow(i64 %a) {
; X86-NEXT: addl %esi, %edx ; X86-NEXT: addl %esi, %edx
; X86-NEXT: subl {{[0-9]+}}(%esp), %edx ; X86-NEXT: subl {{[0-9]+}}(%esp), %edx
; X86-NEXT: popl %esi ; X86-NEXT: popl %esi
; X86-NEXT: .Lcfi8:
; X86-NEXT: .cfi_def_cfa_offset 4
; X86-NEXT: retl ; X86-NEXT: retl
entry: entry:
%tmp3 = mul i64 %a, 9223372036854775807 %tmp3 = mul i64 %a, 9223372036854775807

View File

@ -125,9 +125,17 @@ define <2 x i64> @test5(<2 x i64> %A, <2 x i64> %B) {
; CHECK-NEXT: movl %esi, 4(%eax) ; CHECK-NEXT: movl %esi, 4(%eax)
; CHECK-NEXT: movl %edi, (%eax) ; CHECK-NEXT: movl %edi, (%eax)
; CHECK-NEXT: popl %esi ; CHECK-NEXT: popl %esi
; CHECK-NEXT: .Lcfi8:
; CHECK-NEXT: .cfi_def_cfa_offset 16
; CHECK-NEXT: popl %edi ; CHECK-NEXT: popl %edi
; CHECK-NEXT: .Lcfi9:
; CHECK-NEXT: .cfi_def_cfa_offset 12
; CHECK-NEXT: popl %ebx ; CHECK-NEXT: popl %ebx
; CHECK-NEXT: .Lcfi10:
; CHECK-NEXT: .cfi_def_cfa_offset 8
; CHECK-NEXT: popl %ebp ; CHECK-NEXT: popl %ebp
; CHECK-NEXT: .Lcfi11:
; CHECK-NEXT: .cfi_def_cfa_offset 4
; CHECK-NEXT: retl $4 ; CHECK-NEXT: retl $4
%shl = shl <2 x i64> %A, %B %shl = shl <2 x i64> %A, %B
ret <2 x i64> %shl ret <2 x i64> %shl
@ -138,12 +146,12 @@ define i32 @test6() {
; CHECK-LABEL: test6: ; CHECK-LABEL: test6:
; CHECK: # BB#0: ; CHECK: # BB#0:
; CHECK-NEXT: pushl %ebp ; CHECK-NEXT: pushl %ebp
; CHECK-NEXT: .Lcfi8: ; CHECK-NEXT: .Lcfi12:
; CHECK-NEXT: .cfi_def_cfa_offset 8 ; CHECK-NEXT: .cfi_def_cfa_offset 8
; CHECK-NEXT: .Lcfi9: ; CHECK-NEXT: .Lcfi13:
; CHECK-NEXT: .cfi_offset %ebp, -8 ; CHECK-NEXT: .cfi_offset %ebp, -8
; CHECK-NEXT: movl %esp, %ebp ; CHECK-NEXT: movl %esp, %ebp
; CHECK-NEXT: .Lcfi10: ; CHECK-NEXT: .Lcfi14:
; CHECK-NEXT: .cfi_def_cfa_register %ebp ; CHECK-NEXT: .cfi_def_cfa_register %ebp
; CHECK-NEXT: andl $-8, %esp ; CHECK-NEXT: andl $-8, %esp
; CHECK-NEXT: subl $16, %esp ; CHECK-NEXT: subl $16, %esp
@ -172,6 +180,8 @@ define i32 @test6() {
; CHECK-NEXT: .LBB5_4: # %if.then ; CHECK-NEXT: .LBB5_4: # %if.then
; CHECK-NEXT: movl %ebp, %esp ; CHECK-NEXT: movl %ebp, %esp
; CHECK-NEXT: popl %ebp ; CHECK-NEXT: popl %ebp
; CHECK-NEXT: .Lcfi15:
; CHECK-NEXT: .cfi_def_cfa %esp, 4
; CHECK-NEXT: retl ; CHECK-NEXT: retl
%x = alloca i32, align 4 %x = alloca i32, align 4
%t = alloca i64, align 8 %t = alloca i64, align 8

View File

@ -378,6 +378,8 @@ define i32 @load_i32_by_i8_bswap_uses(i32* %arg) {
; CHECK-NEXT: orl %ecx, %eax ; CHECK-NEXT: orl %ecx, %eax
; CHECK-NEXT: orl %edx, %eax ; CHECK-NEXT: orl %edx, %eax
; CHECK-NEXT: popl %esi ; CHECK-NEXT: popl %esi
; CHECK-NEXT: .Lcfi2:
; CHECK-NEXT: .cfi_def_cfa_offset 4
; CHECK-NEXT: retl ; CHECK-NEXT: retl
; ;
; CHECK64-LABEL: load_i32_by_i8_bswap_uses: ; CHECK64-LABEL: load_i32_by_i8_bswap_uses:
@ -482,9 +484,9 @@ define i32 @load_i32_by_i8_bswap_store_in_between(i32* %arg, i32* %arg1) {
; CHECK-LABEL: load_i32_by_i8_bswap_store_in_between: ; CHECK-LABEL: load_i32_by_i8_bswap_store_in_between:
; CHECK: # BB#0: ; CHECK: # BB#0:
; CHECK-NEXT: pushl %esi ; CHECK-NEXT: pushl %esi
; CHECK-NEXT: .Lcfi2:
; CHECK-NEXT: .cfi_def_cfa_offset 8
; CHECK-NEXT: .Lcfi3: ; CHECK-NEXT: .Lcfi3:
; CHECK-NEXT: .cfi_def_cfa_offset 8
; CHECK-NEXT: .Lcfi4:
; CHECK-NEXT: .cfi_offset %esi, -8 ; CHECK-NEXT: .cfi_offset %esi, -8
; CHECK-NEXT: movl {{[0-9]+}}(%esp), %eax ; CHECK-NEXT: movl {{[0-9]+}}(%esp), %eax
; CHECK-NEXT: movl {{[0-9]+}}(%esp), %ecx ; CHECK-NEXT: movl {{[0-9]+}}(%esp), %ecx
@ -500,6 +502,8 @@ define i32 @load_i32_by_i8_bswap_store_in_between(i32* %arg, i32* %arg1) {
; CHECK-NEXT: movzbl 3(%ecx), %eax ; CHECK-NEXT: movzbl 3(%ecx), %eax
; CHECK-NEXT: orl %edx, %eax ; CHECK-NEXT: orl %edx, %eax
; CHECK-NEXT: popl %esi ; CHECK-NEXT: popl %esi
; CHECK-NEXT: .Lcfi5:
; CHECK-NEXT: .cfi_def_cfa_offset 4
; CHECK-NEXT: retl ; CHECK-NEXT: retl
; ;
; CHECK64-LABEL: load_i32_by_i8_bswap_store_in_between: ; CHECK64-LABEL: load_i32_by_i8_bswap_store_in_between:

View File

@ -1764,6 +1764,8 @@ define <16 x i64> @test_gather_16i64(<16 x i64*> %ptrs, <16 x i1> %mask, <16 x i
; KNL_32-NEXT: vmovdqa64 %zmm2, %zmm0 ; KNL_32-NEXT: vmovdqa64 %zmm2, %zmm0
; KNL_32-NEXT: movl %ebp, %esp ; KNL_32-NEXT: movl %ebp, %esp
; KNL_32-NEXT: popl %ebp ; KNL_32-NEXT: popl %ebp
; KNL_32-NEXT: .Lcfi3:
; KNL_32-NEXT: .cfi_def_cfa %esp, 4
; KNL_32-NEXT: retl ; KNL_32-NEXT: retl
; ;
; SKX-LABEL: test_gather_16i64: ; SKX-LABEL: test_gather_16i64:
@ -1781,12 +1783,12 @@ define <16 x i64> @test_gather_16i64(<16 x i64*> %ptrs, <16 x i1> %mask, <16 x i
; SKX_32-LABEL: test_gather_16i64: ; SKX_32-LABEL: test_gather_16i64:
; SKX_32: # BB#0: ; SKX_32: # BB#0:
; SKX_32-NEXT: pushl %ebp ; SKX_32-NEXT: pushl %ebp
; SKX_32-NEXT: .Lcfi1:
; SKX_32-NEXT: .cfi_def_cfa_offset 8
; SKX_32-NEXT: .Lcfi2: ; SKX_32-NEXT: .Lcfi2:
; SKX_32-NEXT: .cfi_def_cfa_offset 8
; SKX_32-NEXT: .Lcfi3:
; SKX_32-NEXT: .cfi_offset %ebp, -8 ; SKX_32-NEXT: .cfi_offset %ebp, -8
; SKX_32-NEXT: movl %esp, %ebp ; SKX_32-NEXT: movl %esp, %ebp
; SKX_32-NEXT: .Lcfi3: ; SKX_32-NEXT: .Lcfi4:
; SKX_32-NEXT: .cfi_def_cfa_register %ebp ; SKX_32-NEXT: .cfi_def_cfa_register %ebp
; SKX_32-NEXT: andl $-64, %esp ; SKX_32-NEXT: andl $-64, %esp
; SKX_32-NEXT: subl $64, %esp ; SKX_32-NEXT: subl $64, %esp
@ -1801,6 +1803,8 @@ define <16 x i64> @test_gather_16i64(<16 x i64*> %ptrs, <16 x i1> %mask, <16 x i
; SKX_32-NEXT: vmovdqa64 %zmm2, %zmm0 ; SKX_32-NEXT: vmovdqa64 %zmm2, %zmm0
; SKX_32-NEXT: movl %ebp, %esp ; SKX_32-NEXT: movl %ebp, %esp
; SKX_32-NEXT: popl %ebp ; SKX_32-NEXT: popl %ebp
; SKX_32-NEXT: .Lcfi5:
; SKX_32-NEXT: .cfi_def_cfa %esp, 4
; SKX_32-NEXT: retl ; SKX_32-NEXT: retl
%res = call <16 x i64> @llvm.masked.gather.v16i64.v16p0i64(<16 x i64*> %ptrs, i32 4, <16 x i1> %mask, <16 x i64> %src0) %res = call <16 x i64> @llvm.masked.gather.v16i64.v16p0i64(<16 x i64*> %ptrs, i32 4, <16 x i1> %mask, <16 x i64> %src0)
ret <16 x i64> %res ret <16 x i64> %res
@ -1867,12 +1871,12 @@ define <16 x double> @test_gather_16f64(<16 x double*> %ptrs, <16 x i1> %mask, <
; KNL_32-LABEL: test_gather_16f64: ; KNL_32-LABEL: test_gather_16f64:
; KNL_32: # BB#0: ; KNL_32: # BB#0:
; KNL_32-NEXT: pushl %ebp ; KNL_32-NEXT: pushl %ebp
; KNL_32-NEXT: .Lcfi3:
; KNL_32-NEXT: .cfi_def_cfa_offset 8
; KNL_32-NEXT: .Lcfi4: ; KNL_32-NEXT: .Lcfi4:
; KNL_32-NEXT: .cfi_def_cfa_offset 8
; KNL_32-NEXT: .Lcfi5:
; KNL_32-NEXT: .cfi_offset %ebp, -8 ; KNL_32-NEXT: .cfi_offset %ebp, -8
; KNL_32-NEXT: movl %esp, %ebp ; KNL_32-NEXT: movl %esp, %ebp
; KNL_32-NEXT: .Lcfi5: ; KNL_32-NEXT: .Lcfi6:
; KNL_32-NEXT: .cfi_def_cfa_register %ebp ; KNL_32-NEXT: .cfi_def_cfa_register %ebp
; KNL_32-NEXT: andl $-64, %esp ; KNL_32-NEXT: andl $-64, %esp
; KNL_32-NEXT: subl $64, %esp ; KNL_32-NEXT: subl $64, %esp
@ -1887,6 +1891,8 @@ define <16 x double> @test_gather_16f64(<16 x double*> %ptrs, <16 x i1> %mask, <
; KNL_32-NEXT: vmovapd %zmm2, %zmm0 ; KNL_32-NEXT: vmovapd %zmm2, %zmm0
; KNL_32-NEXT: movl %ebp, %esp ; KNL_32-NEXT: movl %ebp, %esp
; KNL_32-NEXT: popl %ebp ; KNL_32-NEXT: popl %ebp
; KNL_32-NEXT: .Lcfi7:
; KNL_32-NEXT: .cfi_def_cfa %esp, 4
; KNL_32-NEXT: retl ; KNL_32-NEXT: retl
; ;
; SKX-LABEL: test_gather_16f64: ; SKX-LABEL: test_gather_16f64:
@ -1904,12 +1910,12 @@ define <16 x double> @test_gather_16f64(<16 x double*> %ptrs, <16 x i1> %mask, <
; SKX_32-LABEL: test_gather_16f64: ; SKX_32-LABEL: test_gather_16f64:
; SKX_32: # BB#0: ; SKX_32: # BB#0:
; SKX_32-NEXT: pushl %ebp ; SKX_32-NEXT: pushl %ebp
; SKX_32-NEXT: .Lcfi4: ; SKX_32-NEXT: .Lcfi6:
; SKX_32-NEXT: .cfi_def_cfa_offset 8 ; SKX_32-NEXT: .cfi_def_cfa_offset 8
; SKX_32-NEXT: .Lcfi5: ; SKX_32-NEXT: .Lcfi7:
; SKX_32-NEXT: .cfi_offset %ebp, -8 ; SKX_32-NEXT: .cfi_offset %ebp, -8
; SKX_32-NEXT: movl %esp, %ebp ; SKX_32-NEXT: movl %esp, %ebp
; SKX_32-NEXT: .Lcfi6: ; SKX_32-NEXT: .Lcfi8:
; SKX_32-NEXT: .cfi_def_cfa_register %ebp ; SKX_32-NEXT: .cfi_def_cfa_register %ebp
; SKX_32-NEXT: andl $-64, %esp ; SKX_32-NEXT: andl $-64, %esp
; SKX_32-NEXT: subl $64, %esp ; SKX_32-NEXT: subl $64, %esp
@ -1924,6 +1930,8 @@ define <16 x double> @test_gather_16f64(<16 x double*> %ptrs, <16 x i1> %mask, <
; SKX_32-NEXT: vmovapd %zmm2, %zmm0 ; SKX_32-NEXT: vmovapd %zmm2, %zmm0
; SKX_32-NEXT: movl %ebp, %esp ; SKX_32-NEXT: movl %ebp, %esp
; SKX_32-NEXT: popl %ebp ; SKX_32-NEXT: popl %ebp
; SKX_32-NEXT: .Lcfi9:
; SKX_32-NEXT: .cfi_def_cfa %esp, 4
; SKX_32-NEXT: retl ; SKX_32-NEXT: retl
%res = call <16 x double> @llvm.masked.gather.v16f64.v16p0f64(<16 x double*> %ptrs, i32 4, <16 x i1> %mask, <16 x double> %src0) %res = call <16 x double> @llvm.masked.gather.v16f64.v16p0f64(<16 x double*> %ptrs, i32 4, <16 x i1> %mask, <16 x double> %src0)
ret <16 x double> %res ret <16 x double> %res
@ -1989,12 +1997,12 @@ define void @test_scatter_16i64(<16 x i64*> %ptrs, <16 x i1> %mask, <16 x i64> %
; KNL_32-LABEL: test_scatter_16i64: ; KNL_32-LABEL: test_scatter_16i64:
; KNL_32: # BB#0: ; KNL_32: # BB#0:
; KNL_32-NEXT: pushl %ebp ; KNL_32-NEXT: pushl %ebp
; KNL_32-NEXT: .Lcfi6: ; KNL_32-NEXT: .Lcfi8:
; KNL_32-NEXT: .cfi_def_cfa_offset 8 ; KNL_32-NEXT: .cfi_def_cfa_offset 8
; KNL_32-NEXT: .Lcfi7: ; KNL_32-NEXT: .Lcfi9:
; KNL_32-NEXT: .cfi_offset %ebp, -8 ; KNL_32-NEXT: .cfi_offset %ebp, -8
; KNL_32-NEXT: movl %esp, %ebp ; KNL_32-NEXT: movl %esp, %ebp
; KNL_32-NEXT: .Lcfi8: ; KNL_32-NEXT: .Lcfi10:
; KNL_32-NEXT: .cfi_def_cfa_register %ebp ; KNL_32-NEXT: .cfi_def_cfa_register %ebp
; KNL_32-NEXT: andl $-64, %esp ; KNL_32-NEXT: andl $-64, %esp
; KNL_32-NEXT: subl $64, %esp ; KNL_32-NEXT: subl $64, %esp
@ -2008,6 +2016,8 @@ define void @test_scatter_16i64(<16 x i64*> %ptrs, <16 x i1> %mask, <16 x i64> %
; KNL_32-NEXT: vpscatterdq %zmm1, (,%ymm0) {%k2} ; KNL_32-NEXT: vpscatterdq %zmm1, (,%ymm0) {%k2}
; KNL_32-NEXT: movl %ebp, %esp ; KNL_32-NEXT: movl %ebp, %esp
; KNL_32-NEXT: popl %ebp ; KNL_32-NEXT: popl %ebp
; KNL_32-NEXT: .Lcfi11:
; KNL_32-NEXT: .cfi_def_cfa %esp, 4
; KNL_32-NEXT: vzeroupper ; KNL_32-NEXT: vzeroupper
; KNL_32-NEXT: retl ; KNL_32-NEXT: retl
; ;
@ -2025,12 +2035,12 @@ define void @test_scatter_16i64(<16 x i64*> %ptrs, <16 x i1> %mask, <16 x i64> %
; SKX_32-LABEL: test_scatter_16i64: ; SKX_32-LABEL: test_scatter_16i64:
; SKX_32: # BB#0: ; SKX_32: # BB#0:
; SKX_32-NEXT: pushl %ebp ; SKX_32-NEXT: pushl %ebp
; SKX_32-NEXT: .Lcfi7: ; SKX_32-NEXT: .Lcfi10:
; SKX_32-NEXT: .cfi_def_cfa_offset 8 ; SKX_32-NEXT: .cfi_def_cfa_offset 8
; SKX_32-NEXT: .Lcfi8: ; SKX_32-NEXT: .Lcfi11:
; SKX_32-NEXT: .cfi_offset %ebp, -8 ; SKX_32-NEXT: .cfi_offset %ebp, -8
; SKX_32-NEXT: movl %esp, %ebp ; SKX_32-NEXT: movl %esp, %ebp
; SKX_32-NEXT: .Lcfi9: ; SKX_32-NEXT: .Lcfi12:
; SKX_32-NEXT: .cfi_def_cfa_register %ebp ; SKX_32-NEXT: .cfi_def_cfa_register %ebp
; SKX_32-NEXT: andl $-64, %esp ; SKX_32-NEXT: andl $-64, %esp
; SKX_32-NEXT: subl $64, %esp ; SKX_32-NEXT: subl $64, %esp
@ -2044,6 +2054,8 @@ define void @test_scatter_16i64(<16 x i64*> %ptrs, <16 x i1> %mask, <16 x i64> %
; SKX_32-NEXT: vpscatterdq %zmm1, (,%ymm0) {%k2} ; SKX_32-NEXT: vpscatterdq %zmm1, (,%ymm0) {%k2}
; SKX_32-NEXT: movl %ebp, %esp ; SKX_32-NEXT: movl %ebp, %esp
; SKX_32-NEXT: popl %ebp ; SKX_32-NEXT: popl %ebp
; SKX_32-NEXT: .Lcfi13:
; SKX_32-NEXT: .cfi_def_cfa %esp, 4
; SKX_32-NEXT: vzeroupper ; SKX_32-NEXT: vzeroupper
; SKX_32-NEXT: retl ; SKX_32-NEXT: retl
call void @llvm.masked.scatter.v16i64.v16p0i64(<16 x i64> %src0, <16 x i64*> %ptrs, i32 4, <16 x i1> %mask) call void @llvm.masked.scatter.v16i64.v16p0i64(<16 x i64> %src0, <16 x i64*> %ptrs, i32 4, <16 x i1> %mask)
@ -2111,12 +2123,12 @@ define void @test_scatter_16f64(<16 x double*> %ptrs, <16 x i1> %mask, <16 x dou
; KNL_32-LABEL: test_scatter_16f64: ; KNL_32-LABEL: test_scatter_16f64:
; KNL_32: # BB#0: ; KNL_32: # BB#0:
; KNL_32-NEXT: pushl %ebp ; KNL_32-NEXT: pushl %ebp
; KNL_32-NEXT: .Lcfi9: ; KNL_32-NEXT: .Lcfi12:
; KNL_32-NEXT: .cfi_def_cfa_offset 8 ; KNL_32-NEXT: .cfi_def_cfa_offset 8
; KNL_32-NEXT: .Lcfi10: ; KNL_32-NEXT: .Lcfi13:
; KNL_32-NEXT: .cfi_offset %ebp, -8 ; KNL_32-NEXT: .cfi_offset %ebp, -8
; KNL_32-NEXT: movl %esp, %ebp ; KNL_32-NEXT: movl %esp, %ebp
; KNL_32-NEXT: .Lcfi11: ; KNL_32-NEXT: .Lcfi14:
; KNL_32-NEXT: .cfi_def_cfa_register %ebp ; KNL_32-NEXT: .cfi_def_cfa_register %ebp
; KNL_32-NEXT: andl $-64, %esp ; KNL_32-NEXT: andl $-64, %esp
; KNL_32-NEXT: subl $64, %esp ; KNL_32-NEXT: subl $64, %esp
@ -2130,6 +2142,8 @@ define void @test_scatter_16f64(<16 x double*> %ptrs, <16 x i1> %mask, <16 x dou
; KNL_32-NEXT: vscatterdpd %zmm1, (,%ymm0) {%k2} ; KNL_32-NEXT: vscatterdpd %zmm1, (,%ymm0) {%k2}
; KNL_32-NEXT: movl %ebp, %esp ; KNL_32-NEXT: movl %ebp, %esp
; KNL_32-NEXT: popl %ebp ; KNL_32-NEXT: popl %ebp
; KNL_32-NEXT: .Lcfi15:
; KNL_32-NEXT: .cfi_def_cfa %esp, 4
; KNL_32-NEXT: vzeroupper ; KNL_32-NEXT: vzeroupper
; KNL_32-NEXT: retl ; KNL_32-NEXT: retl
; ;
@ -2147,12 +2161,12 @@ define void @test_scatter_16f64(<16 x double*> %ptrs, <16 x i1> %mask, <16 x dou
; SKX_32-LABEL: test_scatter_16f64: ; SKX_32-LABEL: test_scatter_16f64:
; SKX_32: # BB#0: ; SKX_32: # BB#0:
; SKX_32-NEXT: pushl %ebp ; SKX_32-NEXT: pushl %ebp
; SKX_32-NEXT: .Lcfi10: ; SKX_32-NEXT: .Lcfi14:
; SKX_32-NEXT: .cfi_def_cfa_offset 8 ; SKX_32-NEXT: .cfi_def_cfa_offset 8
; SKX_32-NEXT: .Lcfi11: ; SKX_32-NEXT: .Lcfi15:
; SKX_32-NEXT: .cfi_offset %ebp, -8 ; SKX_32-NEXT: .cfi_offset %ebp, -8
; SKX_32-NEXT: movl %esp, %ebp ; SKX_32-NEXT: movl %esp, %ebp
; SKX_32-NEXT: .Lcfi12: ; SKX_32-NEXT: .Lcfi16:
; SKX_32-NEXT: .cfi_def_cfa_register %ebp ; SKX_32-NEXT: .cfi_def_cfa_register %ebp
; SKX_32-NEXT: andl $-64, %esp ; SKX_32-NEXT: andl $-64, %esp
; SKX_32-NEXT: subl $64, %esp ; SKX_32-NEXT: subl $64, %esp
@ -2166,6 +2180,8 @@ define void @test_scatter_16f64(<16 x double*> %ptrs, <16 x i1> %mask, <16 x dou
; SKX_32-NEXT: vscatterdpd %zmm1, (,%ymm0) {%k2} ; SKX_32-NEXT: vscatterdpd %zmm1, (,%ymm0) {%k2}
; SKX_32-NEXT: movl %ebp, %esp ; SKX_32-NEXT: movl %ebp, %esp
; SKX_32-NEXT: popl %ebp ; SKX_32-NEXT: popl %ebp
; SKX_32-NEXT: .Lcfi17:
; SKX_32-NEXT: .cfi_def_cfa %esp, 4
; SKX_32-NEXT: vzeroupper ; SKX_32-NEXT: vzeroupper
; SKX_32-NEXT: retl ; SKX_32-NEXT: retl
call void @llvm.masked.scatter.v16f64.v16p0f64(<16 x double> %src0, <16 x double*> %ptrs, i32 4, <16 x i1> %mask) call void @llvm.masked.scatter.v16f64.v16p0f64(<16 x double> %src0, <16 x double*> %ptrs, i32 4, <16 x i1> %mask)
@ -2192,12 +2208,12 @@ define <4 x i64> @test_pr28312(<4 x i64*> %p1, <4 x i1> %k, <4 x i1> %k2,<4 x i6
; KNL_32-LABEL: test_pr28312: ; KNL_32-LABEL: test_pr28312:
; KNL_32: # BB#0: ; KNL_32: # BB#0:
; KNL_32-NEXT: pushl %ebp ; KNL_32-NEXT: pushl %ebp
; KNL_32-NEXT: .Lcfi12: ; KNL_32-NEXT: .Lcfi16:
; KNL_32-NEXT: .cfi_def_cfa_offset 8 ; KNL_32-NEXT: .cfi_def_cfa_offset 8
; KNL_32-NEXT: .Lcfi13: ; KNL_32-NEXT: .Lcfi17:
; KNL_32-NEXT: .cfi_offset %ebp, -8 ; KNL_32-NEXT: .cfi_offset %ebp, -8
; KNL_32-NEXT: movl %esp, %ebp ; KNL_32-NEXT: movl %esp, %ebp
; KNL_32-NEXT: .Lcfi14: ; KNL_32-NEXT: .Lcfi18:
; KNL_32-NEXT: .cfi_def_cfa_register %ebp ; KNL_32-NEXT: .cfi_def_cfa_register %ebp
; KNL_32-NEXT: andl $-32, %esp ; KNL_32-NEXT: andl $-32, %esp
; KNL_32-NEXT: subl $32, %esp ; KNL_32-NEXT: subl $32, %esp
@ -2215,6 +2231,8 @@ define <4 x i64> @test_pr28312(<4 x i64*> %p1, <4 x i1> %k, <4 x i1> %k2,<4 x i6
; KNL_32-NEXT: vpaddq %ymm0, %ymm1, %ymm0 ; KNL_32-NEXT: vpaddq %ymm0, %ymm1, %ymm0
; KNL_32-NEXT: movl %ebp, %esp ; KNL_32-NEXT: movl %ebp, %esp
; KNL_32-NEXT: popl %ebp ; KNL_32-NEXT: popl %ebp
; KNL_32-NEXT: .Lcfi19:
; KNL_32-NEXT: .cfi_def_cfa %esp, 4
; KNL_32-NEXT: retl ; KNL_32-NEXT: retl
; ;
; SKX-LABEL: test_pr28312: ; SKX-LABEL: test_pr28312:
@ -2229,12 +2247,12 @@ define <4 x i64> @test_pr28312(<4 x i64*> %p1, <4 x i1> %k, <4 x i1> %k2,<4 x i6
; SKX_32-LABEL: test_pr28312: ; SKX_32-LABEL: test_pr28312:
; SKX_32: # BB#0: ; SKX_32: # BB#0:
; SKX_32-NEXT: pushl %ebp ; SKX_32-NEXT: pushl %ebp
; SKX_32-NEXT: .Lcfi13: ; SKX_32-NEXT: .Lcfi18:
; SKX_32-NEXT: .cfi_def_cfa_offset 8 ; SKX_32-NEXT: .cfi_def_cfa_offset 8
; SKX_32-NEXT: .Lcfi14: ; SKX_32-NEXT: .Lcfi19:
; SKX_32-NEXT: .cfi_offset %ebp, -8 ; SKX_32-NEXT: .cfi_offset %ebp, -8
; SKX_32-NEXT: movl %esp, %ebp ; SKX_32-NEXT: movl %esp, %ebp
; SKX_32-NEXT: .Lcfi15: ; SKX_32-NEXT: .Lcfi20:
; SKX_32-NEXT: .cfi_def_cfa_register %ebp ; SKX_32-NEXT: .cfi_def_cfa_register %ebp
; SKX_32-NEXT: andl $-32, %esp ; SKX_32-NEXT: andl $-32, %esp
; SKX_32-NEXT: subl $32, %esp ; SKX_32-NEXT: subl $32, %esp
@ -2245,6 +2263,8 @@ define <4 x i64> @test_pr28312(<4 x i64*> %p1, <4 x i1> %k, <4 x i1> %k2,<4 x i6
; SKX_32-NEXT: vpaddq %ymm0, %ymm1, %ymm0 ; SKX_32-NEXT: vpaddq %ymm0, %ymm1, %ymm0
; SKX_32-NEXT: movl %ebp, %esp ; SKX_32-NEXT: movl %ebp, %esp
; SKX_32-NEXT: popl %ebp ; SKX_32-NEXT: popl %ebp
; SKX_32-NEXT: .Lcfi21:
; SKX_32-NEXT: .cfi_def_cfa %esp, 4
; SKX_32-NEXT: retl ; SKX_32-NEXT: retl
%g1 = call <4 x i64> @llvm.masked.gather.v4i64.v4p0i64(<4 x i64*> %p1, i32 8, <4 x i1> %k, <4 x i64> undef) %g1 = call <4 x i64> @llvm.masked.gather.v4i64.v4p0i64(<4 x i64*> %p1, i32 8, <4 x i1> %k, <4 x i64> undef)
%g2 = call <4 x i64> @llvm.masked.gather.v4i64.v4p0i64(<4 x i64*> %p1, i32 8, <4 x i1> %k, <4 x i64> undef) %g2 = call <4 x i64> @llvm.masked.gather.v4i64.v4p0i64(<4 x i64*> %p1, i32 8, <4 x i1> %k, <4 x i64> undef)

View File

@ -149,6 +149,8 @@ define void @memset_256_nonzero_bytes(i8* %x) {
; SSE-NEXT: movl $256, %edx # imm = 0x100 ; SSE-NEXT: movl $256, %edx # imm = 0x100
; SSE-NEXT: callq memset ; SSE-NEXT: callq memset
; SSE-NEXT: popq %rax ; SSE-NEXT: popq %rax
; SSE-NEXT: .Lcfi1:
; SSE-NEXT: .cfi_def_cfa_offset 8
; SSE-NEXT: retq ; SSE-NEXT: retq
; ;
; SSE2FAST-LABEL: memset_256_nonzero_bytes: ; SSE2FAST-LABEL: memset_256_nonzero_bytes:

View File

@ -76,7 +76,11 @@ define <2 x i64> @merge_2i64_i64_12(i64* %ptr) nounwind uwtable noinline ssp {
; X32-SSE1-NEXT: movl %esi, 4(%eax) ; X32-SSE1-NEXT: movl %esi, 4(%eax)
; X32-SSE1-NEXT: movl %edx, (%eax) ; X32-SSE1-NEXT: movl %edx, (%eax)
; X32-SSE1-NEXT: popl %esi ; X32-SSE1-NEXT: popl %esi
; X32-SSE1-NEXT: .Lcfi4:
; X32-SSE1-NEXT: .cfi_def_cfa_offset 8
; X32-SSE1-NEXT: popl %edi ; X32-SSE1-NEXT: popl %edi
; X32-SSE1-NEXT: .Lcfi5:
; X32-SSE1-NEXT: .cfi_def_cfa_offset 4
; X32-SSE1-NEXT: retl $4 ; X32-SSE1-NEXT: retl $4
; ;
; X32-SSE41-LABEL: merge_2i64_i64_12: ; X32-SSE41-LABEL: merge_2i64_i64_12:
@ -377,9 +381,9 @@ define <4 x i32> @merge_4i32_i32_23u5(i32* %ptr) nounwind uwtable noinline ssp {
; X32-SSE1-LABEL: merge_4i32_i32_23u5: ; X32-SSE1-LABEL: merge_4i32_i32_23u5:
; X32-SSE1: # BB#0: ; X32-SSE1: # BB#0:
; X32-SSE1-NEXT: pushl %esi ; X32-SSE1-NEXT: pushl %esi
; X32-SSE1-NEXT: .Lcfi4: ; X32-SSE1-NEXT: .Lcfi6:
; X32-SSE1-NEXT: .cfi_def_cfa_offset 8 ; X32-SSE1-NEXT: .cfi_def_cfa_offset 8
; X32-SSE1-NEXT: .Lcfi5: ; X32-SSE1-NEXT: .Lcfi7:
; X32-SSE1-NEXT: .cfi_offset %esi, -8 ; X32-SSE1-NEXT: .cfi_offset %esi, -8
; X32-SSE1-NEXT: movl {{[0-9]+}}(%esp), %eax ; X32-SSE1-NEXT: movl {{[0-9]+}}(%esp), %eax
; X32-SSE1-NEXT: movl {{[0-9]+}}(%esp), %ecx ; X32-SSE1-NEXT: movl {{[0-9]+}}(%esp), %ecx
@ -390,6 +394,8 @@ define <4 x i32> @merge_4i32_i32_23u5(i32* %ptr) nounwind uwtable noinline ssp {
; X32-SSE1-NEXT: movl %edx, (%eax) ; X32-SSE1-NEXT: movl %edx, (%eax)
; X32-SSE1-NEXT: movl %ecx, 12(%eax) ; X32-SSE1-NEXT: movl %ecx, 12(%eax)
; X32-SSE1-NEXT: popl %esi ; X32-SSE1-NEXT: popl %esi
; X32-SSE1-NEXT: .Lcfi8:
; X32-SSE1-NEXT: .cfi_def_cfa_offset 4
; X32-SSE1-NEXT: retl $4 ; X32-SSE1-NEXT: retl $4
; ;
; X32-SSE41-LABEL: merge_4i32_i32_23u5: ; X32-SSE41-LABEL: merge_4i32_i32_23u5:
@ -527,24 +533,24 @@ define <8 x i16> @merge_8i16_i16_23u567u9(i16* %ptr) nounwind uwtable noinline s
; X32-SSE1-LABEL: merge_8i16_i16_23u567u9: ; X32-SSE1-LABEL: merge_8i16_i16_23u567u9:
; X32-SSE1: # BB#0: ; X32-SSE1: # BB#0:
; X32-SSE1-NEXT: pushl %ebp ; X32-SSE1-NEXT: pushl %ebp
; X32-SSE1-NEXT: .Lcfi6: ; X32-SSE1-NEXT: .Lcfi9:
; X32-SSE1-NEXT: .cfi_def_cfa_offset 8 ; X32-SSE1-NEXT: .cfi_def_cfa_offset 8
; X32-SSE1-NEXT: pushl %ebx ; X32-SSE1-NEXT: pushl %ebx
; X32-SSE1-NEXT: .Lcfi7: ; X32-SSE1-NEXT: .Lcfi10:
; X32-SSE1-NEXT: .cfi_def_cfa_offset 12 ; X32-SSE1-NEXT: .cfi_def_cfa_offset 12
; X32-SSE1-NEXT: pushl %edi ; X32-SSE1-NEXT: pushl %edi
; X32-SSE1-NEXT: .Lcfi8: ; X32-SSE1-NEXT: .Lcfi11:
; X32-SSE1-NEXT: .cfi_def_cfa_offset 16 ; X32-SSE1-NEXT: .cfi_def_cfa_offset 16
; X32-SSE1-NEXT: pushl %esi ; X32-SSE1-NEXT: pushl %esi
; X32-SSE1-NEXT: .Lcfi9:
; X32-SSE1-NEXT: .cfi_def_cfa_offset 20
; X32-SSE1-NEXT: .Lcfi10:
; X32-SSE1-NEXT: .cfi_offset %esi, -20
; X32-SSE1-NEXT: .Lcfi11:
; X32-SSE1-NEXT: .cfi_offset %edi, -16
; X32-SSE1-NEXT: .Lcfi12: ; X32-SSE1-NEXT: .Lcfi12:
; X32-SSE1-NEXT: .cfi_offset %ebx, -12 ; X32-SSE1-NEXT: .cfi_def_cfa_offset 20
; X32-SSE1-NEXT: .Lcfi13: ; X32-SSE1-NEXT: .Lcfi13:
; X32-SSE1-NEXT: .cfi_offset %esi, -20
; X32-SSE1-NEXT: .Lcfi14:
; X32-SSE1-NEXT: .cfi_offset %edi, -16
; X32-SSE1-NEXT: .Lcfi15:
; X32-SSE1-NEXT: .cfi_offset %ebx, -12
; X32-SSE1-NEXT: .Lcfi16:
; X32-SSE1-NEXT: .cfi_offset %ebp, -8 ; X32-SSE1-NEXT: .cfi_offset %ebp, -8
; X32-SSE1-NEXT: movl {{[0-9]+}}(%esp), %eax ; X32-SSE1-NEXT: movl {{[0-9]+}}(%esp), %eax
; X32-SSE1-NEXT: movl {{[0-9]+}}(%esp), %ecx ; X32-SSE1-NEXT: movl {{[0-9]+}}(%esp), %ecx
@ -561,9 +567,17 @@ define <8 x i16> @merge_8i16_i16_23u567u9(i16* %ptr) nounwind uwtable noinline s
; X32-SSE1-NEXT: movw %dx, (%eax) ; X32-SSE1-NEXT: movw %dx, (%eax)
; X32-SSE1-NEXT: movw %di, 6(%eax) ; X32-SSE1-NEXT: movw %di, 6(%eax)
; X32-SSE1-NEXT: popl %esi ; X32-SSE1-NEXT: popl %esi
; X32-SSE1-NEXT: .Lcfi17:
; X32-SSE1-NEXT: .cfi_def_cfa_offset 16
; X32-SSE1-NEXT: popl %edi ; X32-SSE1-NEXT: popl %edi
; X32-SSE1-NEXT: .Lcfi18:
; X32-SSE1-NEXT: .cfi_def_cfa_offset 12
; X32-SSE1-NEXT: popl %ebx ; X32-SSE1-NEXT: popl %ebx
; X32-SSE1-NEXT: .Lcfi19:
; X32-SSE1-NEXT: .cfi_def_cfa_offset 8
; X32-SSE1-NEXT: popl %ebp ; X32-SSE1-NEXT: popl %ebp
; X32-SSE1-NEXT: .Lcfi20:
; X32-SSE1-NEXT: .cfi_def_cfa_offset 4
; X32-SSE1-NEXT: retl $4 ; X32-SSE1-NEXT: retl $4
; ;
; X32-SSE41-LABEL: merge_8i16_i16_23u567u9: ; X32-SSE41-LABEL: merge_8i16_i16_23u567u9:
@ -641,9 +655,9 @@ define <8 x i16> @merge_8i16_i16_45u7zzzz(i16* %ptr) nounwind uwtable noinline s
; X32-SSE1-LABEL: merge_8i16_i16_45u7zzzz: ; X32-SSE1-LABEL: merge_8i16_i16_45u7zzzz:
; X32-SSE1: # BB#0: ; X32-SSE1: # BB#0:
; X32-SSE1-NEXT: pushl %esi ; X32-SSE1-NEXT: pushl %esi
; X32-SSE1-NEXT: .Lcfi14: ; X32-SSE1-NEXT: .Lcfi21:
; X32-SSE1-NEXT: .cfi_def_cfa_offset 8 ; X32-SSE1-NEXT: .cfi_def_cfa_offset 8
; X32-SSE1-NEXT: .Lcfi15: ; X32-SSE1-NEXT: .Lcfi22:
; X32-SSE1-NEXT: .cfi_offset %esi, -8 ; X32-SSE1-NEXT: .cfi_offset %esi, -8
; X32-SSE1-NEXT: movl {{[0-9]+}}(%esp), %eax ; X32-SSE1-NEXT: movl {{[0-9]+}}(%esp), %eax
; X32-SSE1-NEXT: movl {{[0-9]+}}(%esp), %ecx ; X32-SSE1-NEXT: movl {{[0-9]+}}(%esp), %ecx
@ -658,6 +672,8 @@ define <8 x i16> @merge_8i16_i16_45u7zzzz(i16* %ptr) nounwind uwtable noinline s
; X32-SSE1-NEXT: movw $0, 10(%eax) ; X32-SSE1-NEXT: movw $0, 10(%eax)
; X32-SSE1-NEXT: movw $0, 8(%eax) ; X32-SSE1-NEXT: movw $0, 8(%eax)
; X32-SSE1-NEXT: popl %esi ; X32-SSE1-NEXT: popl %esi
; X32-SSE1-NEXT: .Lcfi23:
; X32-SSE1-NEXT: .cfi_def_cfa_offset 4
; X32-SSE1-NEXT: retl $4 ; X32-SSE1-NEXT: retl $4
; ;
; X32-SSE41-LABEL: merge_8i16_i16_45u7zzzz: ; X32-SSE41-LABEL: merge_8i16_i16_45u7zzzz:
@ -695,12 +711,12 @@ define <16 x i8> @merge_16i8_i8_01u3456789ABCDuF(i8* %ptr) nounwind uwtable noin
; X32-SSE1-LABEL: merge_16i8_i8_01u3456789ABCDuF: ; X32-SSE1-LABEL: merge_16i8_i8_01u3456789ABCDuF:
; X32-SSE1: # BB#0: ; X32-SSE1: # BB#0:
; X32-SSE1-NEXT: pushl %ebx ; X32-SSE1-NEXT: pushl %ebx
; X32-SSE1-NEXT: .Lcfi16: ; X32-SSE1-NEXT: .Lcfi24:
; X32-SSE1-NEXT: .cfi_def_cfa_offset 8 ; X32-SSE1-NEXT: .cfi_def_cfa_offset 8
; X32-SSE1-NEXT: subl $12, %esp ; X32-SSE1-NEXT: subl $12, %esp
; X32-SSE1-NEXT: .Lcfi17: ; X32-SSE1-NEXT: .Lcfi25:
; X32-SSE1-NEXT: .cfi_def_cfa_offset 20 ; X32-SSE1-NEXT: .cfi_def_cfa_offset 20
; X32-SSE1-NEXT: .Lcfi18: ; X32-SSE1-NEXT: .Lcfi26:
; X32-SSE1-NEXT: .cfi_offset %ebx, -8 ; X32-SSE1-NEXT: .cfi_offset %ebx, -8
; X32-SSE1-NEXT: movl {{[0-9]+}}(%esp), %eax ; X32-SSE1-NEXT: movl {{[0-9]+}}(%esp), %eax
; X32-SSE1-NEXT: movl {{[0-9]+}}(%esp), %ecx ; X32-SSE1-NEXT: movl {{[0-9]+}}(%esp), %ecx
@ -751,7 +767,11 @@ define <16 x i8> @merge_16i8_i8_01u3456789ABCDuF(i8* %ptr) nounwind uwtable noin
; X32-SSE1-NEXT: movb {{[0-9]+}}(%esp), %cl # 1-byte Reload ; X32-SSE1-NEXT: movb {{[0-9]+}}(%esp), %cl # 1-byte Reload
; X32-SSE1-NEXT: movb %cl, 3(%eax) ; X32-SSE1-NEXT: movb %cl, 3(%eax)
; X32-SSE1-NEXT: addl $12, %esp ; X32-SSE1-NEXT: addl $12, %esp
; X32-SSE1-NEXT: .Lcfi27:
; X32-SSE1-NEXT: .cfi_def_cfa_offset 8
; X32-SSE1-NEXT: popl %ebx ; X32-SSE1-NEXT: popl %ebx
; X32-SSE1-NEXT: .Lcfi28:
; X32-SSE1-NEXT: .cfi_def_cfa_offset 4
; X32-SSE1-NEXT: retl $4 ; X32-SSE1-NEXT: retl $4
; ;
; X32-SSE41-LABEL: merge_16i8_i8_01u3456789ABCDuF: ; X32-SSE41-LABEL: merge_16i8_i8_01u3456789ABCDuF:
@ -868,12 +888,12 @@ define <16 x i8> @merge_16i8_i8_0123uu67uuuuuzzz(i8* %ptr) nounwind uwtable noin
; X32-SSE1-LABEL: merge_16i8_i8_0123uu67uuuuuzzz: ; X32-SSE1-LABEL: merge_16i8_i8_0123uu67uuuuuzzz:
; X32-SSE1: # BB#0: ; X32-SSE1: # BB#0:
; X32-SSE1-NEXT: pushl %ebx ; X32-SSE1-NEXT: pushl %ebx
; X32-SSE1-NEXT: .Lcfi19: ; X32-SSE1-NEXT: .Lcfi29:
; X32-SSE1-NEXT: .cfi_def_cfa_offset 8 ; X32-SSE1-NEXT: .cfi_def_cfa_offset 8
; X32-SSE1-NEXT: pushl %eax ; X32-SSE1-NEXT: pushl %eax
; X32-SSE1-NEXT: .Lcfi20: ; X32-SSE1-NEXT: .Lcfi30:
; X32-SSE1-NEXT: .cfi_def_cfa_offset 12 ; X32-SSE1-NEXT: .cfi_def_cfa_offset 12
; X32-SSE1-NEXT: .Lcfi21: ; X32-SSE1-NEXT: .Lcfi31:
; X32-SSE1-NEXT: .cfi_offset %ebx, -8 ; X32-SSE1-NEXT: .cfi_offset %ebx, -8
; X32-SSE1-NEXT: movl {{[0-9]+}}(%esp), %eax ; X32-SSE1-NEXT: movl {{[0-9]+}}(%esp), %eax
; X32-SSE1-NEXT: movl {{[0-9]+}}(%esp), %ecx ; X32-SSE1-NEXT: movl {{[0-9]+}}(%esp), %ecx
@ -895,7 +915,11 @@ define <16 x i8> @merge_16i8_i8_0123uu67uuuuuzzz(i8* %ptr) nounwind uwtable noin
; X32-SSE1-NEXT: movb $0, 14(%eax) ; X32-SSE1-NEXT: movb $0, 14(%eax)
; X32-SSE1-NEXT: movb $0, 13(%eax) ; X32-SSE1-NEXT: movb $0, 13(%eax)
; X32-SSE1-NEXT: addl $4, %esp ; X32-SSE1-NEXT: addl $4, %esp
; X32-SSE1-NEXT: .Lcfi32:
; X32-SSE1-NEXT: .cfi_def_cfa_offset 8
; X32-SSE1-NEXT: popl %ebx ; X32-SSE1-NEXT: popl %ebx
; X32-SSE1-NEXT: .Lcfi33:
; X32-SSE1-NEXT: .cfi_def_cfa_offset 4
; X32-SSE1-NEXT: retl $4 ; X32-SSE1-NEXT: retl $4
; ;
; X32-SSE41-LABEL: merge_16i8_i8_0123uu67uuuuuzzz: ; X32-SSE41-LABEL: merge_16i8_i8_0123uu67uuuuuzzz:
@ -990,14 +1014,14 @@ define <2 x i64> @merge_2i64_i64_12_volatile(i64* %ptr) nounwind uwtable noinlin
; X32-SSE1-LABEL: merge_2i64_i64_12_volatile: ; X32-SSE1-LABEL: merge_2i64_i64_12_volatile:
; X32-SSE1: # BB#0: ; X32-SSE1: # BB#0:
; X32-SSE1-NEXT: pushl %edi ; X32-SSE1-NEXT: pushl %edi
; X32-SSE1-NEXT: .Lcfi22: ; X32-SSE1-NEXT: .Lcfi34:
; X32-SSE1-NEXT: .cfi_def_cfa_offset 8 ; X32-SSE1-NEXT: .cfi_def_cfa_offset 8
; X32-SSE1-NEXT: pushl %esi ; X32-SSE1-NEXT: pushl %esi
; X32-SSE1-NEXT: .Lcfi23: ; X32-SSE1-NEXT: .Lcfi35:
; X32-SSE1-NEXT: .cfi_def_cfa_offset 12 ; X32-SSE1-NEXT: .cfi_def_cfa_offset 12
; X32-SSE1-NEXT: .Lcfi24: ; X32-SSE1-NEXT: .Lcfi36:
; X32-SSE1-NEXT: .cfi_offset %esi, -12 ; X32-SSE1-NEXT: .cfi_offset %esi, -12
; X32-SSE1-NEXT: .Lcfi25: ; X32-SSE1-NEXT: .Lcfi37:
; X32-SSE1-NEXT: .cfi_offset %edi, -8 ; X32-SSE1-NEXT: .cfi_offset %edi, -8
; X32-SSE1-NEXT: movl {{[0-9]+}}(%esp), %eax ; X32-SSE1-NEXT: movl {{[0-9]+}}(%esp), %eax
; X32-SSE1-NEXT: movl {{[0-9]+}}(%esp), %ecx ; X32-SSE1-NEXT: movl {{[0-9]+}}(%esp), %ecx
@ -1010,7 +1034,11 @@ define <2 x i64> @merge_2i64_i64_12_volatile(i64* %ptr) nounwind uwtable noinlin
; X32-SSE1-NEXT: movl %esi, 4(%eax) ; X32-SSE1-NEXT: movl %esi, 4(%eax)
; X32-SSE1-NEXT: movl %edx, (%eax) ; X32-SSE1-NEXT: movl %edx, (%eax)
; X32-SSE1-NEXT: popl %esi ; X32-SSE1-NEXT: popl %esi
; X32-SSE1-NEXT: .Lcfi38:
; X32-SSE1-NEXT: .cfi_def_cfa_offset 8
; X32-SSE1-NEXT: popl %edi ; X32-SSE1-NEXT: popl %edi
; X32-SSE1-NEXT: .Lcfi39:
; X32-SSE1-NEXT: .cfi_def_cfa_offset 4
; X32-SSE1-NEXT: retl $4 ; X32-SSE1-NEXT: retl $4
; ;
; X32-SSE41-LABEL: merge_2i64_i64_12_volatile: ; X32-SSE41-LABEL: merge_2i64_i64_12_volatile:

View File

@ -376,8 +376,10 @@ entry:
; LINUX: pushl $1 ; LINUX: pushl $1
; LINUX: .cfi_adjust_cfa_offset 4 ; LINUX: .cfi_adjust_cfa_offset 4
; LINUX: calll good ; LINUX: calll good
; LINUX: addl $28, %esp ; LINUX: addl $16, %esp
; LINUX: .cfi_adjust_cfa_offset -16 ; LINUX: .cfi_adjust_cfa_offset -16
; LINUX: addl $12, %esp
; LINUX: .cfi_def_cfa_offset 4
; LINUX-NOT: add ; LINUX-NOT: add
; LINUX: retl ; LINUX: retl
define void @pr27140() optsize { define void @pr27140() optsize {

File diff suppressed because it is too large Load Diff

View File

@ -193,6 +193,8 @@ define void @test(i256* %a, i256* %b, i256* %out) #0 {
; X32-NEXT: popl %edi ; X32-NEXT: popl %edi
; X32-NEXT: popl %ebx ; X32-NEXT: popl %ebx
; X32-NEXT: popl %ebp ; X32-NEXT: popl %ebp
; X32-NEXT: .Lcfi6:
; X32-NEXT: .cfi_def_cfa %esp, 4
; X32-NEXT: retl ; X32-NEXT: retl
; ;
; X64-LABEL: test: ; X64-LABEL: test:
@ -267,8 +269,14 @@ define void @test(i256* %a, i256* %b, i256* %out) #0 {
; X64-NEXT: movq %rax, 16(%r9) ; X64-NEXT: movq %rax, 16(%r9)
; X64-NEXT: movq %rdx, 24(%r9) ; X64-NEXT: movq %rdx, 24(%r9)
; X64-NEXT: popq %rbx ; X64-NEXT: popq %rbx
; X64-NEXT: .Lcfi6:
; X64-NEXT: .cfi_def_cfa_offset 24
; X64-NEXT: popq %r14 ; X64-NEXT: popq %r14
; X64-NEXT: .Lcfi7:
; X64-NEXT: .cfi_def_cfa_offset 16
; X64-NEXT: popq %r15 ; X64-NEXT: popq %r15
; X64-NEXT: .Lcfi8:
; X64-NEXT: .cfi_def_cfa_offset 8
; X64-NEXT: retq ; X64-NEXT: retq
entry: entry:
%av = load i256, i256* %a %av = load i256, i256* %a

View File

@ -29,6 +29,8 @@ define void @func(<4 x float> %vx) {
; CHECK-NEXT: leaq stuff+8(%r9), %r9 ; CHECK-NEXT: leaq stuff+8(%r9), %r9
; CHECK-NEXT: callq toto ; CHECK-NEXT: callq toto
; CHECK-NEXT: popq %rax ; CHECK-NEXT: popq %rax
; CHECK-NEXT: .Lcfi1:
; CHECK-NEXT: .cfi_def_cfa_offset 8
; CHECK-NEXT: retq ; CHECK-NEXT: retq
entry: entry:
%tmp2 = bitcast <4 x float> %vx to <2 x i64> %tmp2 = bitcast <4 x float> %vx to <2 x i64>

View File

@ -66,6 +66,8 @@ define <4 x float> @bar(<4 x float>* %a1p, <4 x float>* %a2p, <4 x float> %a3, <
; CHECK-NEXT: vaddps {{[0-9]+}}(%rsp), %xmm1, %xmm1 # 16-byte Folded Reload ; CHECK-NEXT: vaddps {{[0-9]+}}(%rsp), %xmm1, %xmm1 # 16-byte Folded Reload
; CHECK-NEXT: vaddps %xmm0, %xmm1, %xmm0 ; CHECK-NEXT: vaddps %xmm0, %xmm1, %xmm0
; CHECK-NEXT: addq $88, %rsp ; CHECK-NEXT: addq $88, %rsp
; CHECK-NEXT: .Lcfi1:
; CHECK-NEXT: .cfi_def_cfa_offset 8
; CHECK-NEXT: retq ; CHECK-NEXT: retq
%a1 = shufflevector <16 x float>%c1, <16 x float>%c2, <4 x i32> <i32 4, i32 20, i32 1, i32 17> %a1 = shufflevector <16 x float>%c1, <16 x float>%c2, <4 x i32> <i32 4, i32 20, i32 1, i32 17>

View File

@ -111,6 +111,8 @@ define <16 x float> @makefloat(float %f1, float %f2, float %f3, float %f4, float
; CHECK-NEXT: vmovss %xmm14, (%rsp) # 4-byte Spill ; CHECK-NEXT: vmovss %xmm14, (%rsp) # 4-byte Spill
; CHECK-NEXT: movq %rbp, %rsp ; CHECK-NEXT: movq %rbp, %rsp
; CHECK-NEXT: popq %rbp ; CHECK-NEXT: popq %rbp
; CHECK-NEXT: .Lcfi3:
; CHECK-NEXT: .cfi_def_cfa %rsp, 8
; CHECK-NEXT: retq ; CHECK-NEXT: retq
entry: entry:
%__A.addr.i = alloca float, align 4 %__A.addr.i = alloca float, align 4

View File

@ -54,7 +54,11 @@ define i32 @_Z3foov() {
; CHECK-NEXT: movw %dx, {{[0-9]+}}(%esp) ; CHECK-NEXT: movw %dx, {{[0-9]+}}(%esp)
; CHECK-NEXT: movzwl {{[0-9]+}}(%esp), %eax ; CHECK-NEXT: movzwl {{[0-9]+}}(%esp), %eax
; CHECK-NEXT: addl $24, %esp ; CHECK-NEXT: addl $24, %esp
; CHECK-NEXT: .Lcfi3:
; CHECK-NEXT: .cfi_def_cfa_offset 8
; CHECK-NEXT: popl %esi ; CHECK-NEXT: popl %esi
; CHECK-NEXT: .Lcfi4:
; CHECK-NEXT: .cfi_def_cfa_offset 4
; CHECK-NEXT: retl ; CHECK-NEXT: retl
entry: entry:
%aa = alloca i16, align 2 %aa = alloca i16, align 2

View File

@ -28,6 +28,8 @@ define void @_Z1av() {
; CHECK-NEXT: andb $1, %al ; CHECK-NEXT: andb $1, %al
; CHECK-NEXT: movb %al, {{[0-9]+}}(%esp) ; CHECK-NEXT: movb %al, {{[0-9]+}}(%esp)
; CHECK-NEXT: addl $2, %esp ; CHECK-NEXT: addl $2, %esp
; CHECK-NEXT: .Lcfi1:
; CHECK-NEXT: .cfi_def_cfa_offset 4
; CHECK-NEXT: retl ; CHECK-NEXT: retl
entry: entry:
%b = alloca i8, align 1 %b = alloca i8, align 1

View File

@ -64,9 +64,17 @@ define void @foo() local_unnamed_addr {
; X86-NEXT: imull %eax, %ebx ; X86-NEXT: imull %eax, %ebx
; X86-NEXT: movb %bl, var_218 ; X86-NEXT: movb %bl, var_218
; X86-NEXT: popl %esi ; X86-NEXT: popl %esi
; X86-NEXT: .Lcfi8:
; X86-NEXT: .cfi_def_cfa_offset 16
; X86-NEXT: popl %edi ; X86-NEXT: popl %edi
; X86-NEXT: .Lcfi9:
; X86-NEXT: .cfi_def_cfa_offset 12
; X86-NEXT: popl %ebx ; X86-NEXT: popl %ebx
; X86-NEXT: .Lcfi10:
; X86-NEXT: .cfi_def_cfa_offset 8
; X86-NEXT: popl %ebp ; X86-NEXT: popl %ebp
; X86-NEXT: .Lcfi11:
; X86-NEXT: .cfi_def_cfa_offset 4
; X86-NEXT: retl ; X86-NEXT: retl
; ;
; X64-LABEL: foo: ; X64-LABEL: foo:

View File

@ -90,6 +90,8 @@ define void @foo() {
; 6860-NEXT: popl %edi ; 6860-NEXT: popl %edi
; 6860-NEXT: popl %ebx ; 6860-NEXT: popl %ebx
; 6860-NEXT: popl %ebp ; 6860-NEXT: popl %ebp
; 6860-NEXT: .Lcfi6:
; 6860-NEXT: .cfi_def_cfa %esp, 4
; 6860-NEXT: retl ; 6860-NEXT: retl
; ;
; X64-LABEL: foo: ; X64-LABEL: foo:
@ -136,6 +138,8 @@ define void @foo() {
; 686-NEXT: movb %dl, (%eax) ; 686-NEXT: movb %dl, (%eax)
; 686-NEXT: movl %ebp, %esp ; 686-NEXT: movl %ebp, %esp
; 686-NEXT: popl %ebp ; 686-NEXT: popl %ebp
; 686-NEXT: .Lcfi3:
; 686-NEXT: .cfi_def_cfa %esp, 4
; 686-NEXT: retl ; 686-NEXT: retl
bb: bb:
%tmp = alloca i64, align 8 %tmp = alloca i64, align 8

View File

@ -33,7 +33,11 @@ define i8** @japi1_convert_690(i8**, i8***, i32) {
; CHECK-NEXT: movl {{[0-9]+}}(%esp), %ecx # 4-byte Reload ; CHECK-NEXT: movl {{[0-9]+}}(%esp), %ecx # 4-byte Reload
; CHECK-NEXT: movl %eax, (%ecx) ; CHECK-NEXT: movl %eax, (%ecx)
; CHECK-NEXT: addl $16, %esp ; CHECK-NEXT: addl $16, %esp
; CHECK-NEXT: .Lcfi3:
; CHECK-NEXT: .cfi_def_cfa_offset 8
; CHECK-NEXT: popl %ebx ; CHECK-NEXT: popl %ebx
; CHECK-NEXT: .Lcfi4:
; CHECK-NEXT: .cfi_def_cfa_offset 4
; CHECK-NEXT: retl ; CHECK-NEXT: retl
top: top:
%3 = alloca i8*** %3 = alloca i8***

View File

@ -14,4 +14,6 @@ define void @f() {
; CHECK-NEXT: : ; CHECK-NEXT: :
; CHECK-NEXT: .cfi_def_cfa_register %rbp ; CHECK-NEXT: .cfi_def_cfa_register %rbp
; CHECK-NEXT: popq %rbp ; CHECK-NEXT: popq %rbp
; CHECK-NEXT: :
; CHECK-NEXT: .cfi_def_cfa %rsp, 8
; CHECK-NEXT: ret ; CHECK-NEXT: ret

View File

@ -23,8 +23,10 @@ declare x86_stdcallcc void @stdfoo(i32, i32) #0
; CHECK: .cfi_adjust_cfa_offset 4 ; CHECK: .cfi_adjust_cfa_offset 4
; CHECK: calll stdfoo ; CHECK: calll stdfoo
; CHECK: .cfi_adjust_cfa_offset -8 ; CHECK: .cfi_adjust_cfa_offset -8
; CHECK: addl $20, %esp ; CHECK: addl $8, %esp
; CHECK: .cfi_adjust_cfa_offset -8 ; CHECK: .cfi_adjust_cfa_offset -8
; CHECK: addl $12, %esp
; CHECK: .cfi_def_cfa_offset 4
define void @test1() #0 !dbg !4 { define void @test1() #0 !dbg !4 {
entry: entry:
tail call void @foo(i32 1, i32 2) #1, !dbg !10 tail call void @foo(i32 1, i32 2) #1, !dbg !10

View File

@ -12,7 +12,7 @@
; LINUX-NEXT: ] ; LINUX-NEXT: ]
; LINUX-NEXT: Address: 0x0 ; LINUX-NEXT: Address: 0x0
; LINUX-NEXT: Offset: 0x68 ; LINUX-NEXT: Offset: 0x68
; LINUX-NEXT: Size: 64 ; LINUX-NEXT: Size: 72
; LINUX-NEXT: Link: 0 ; LINUX-NEXT: Link: 0
; LINUX-NEXT: Info: 0 ; LINUX-NEXT: Info: 0
; LINUX-NEXT: AddressAlignment: 4 ; LINUX-NEXT: AddressAlignment: 4
@ -22,8 +22,9 @@
; LINUX-NEXT: SectionData ( ; LINUX-NEXT: SectionData (
; LINUX-NEXT: 0000: 1C000000 00000000 017A504C 5200017C |.........zPLR..|| ; LINUX-NEXT: 0000: 1C000000 00000000 017A504C 5200017C |.........zPLR..||
; LINUX-NEXT: 0010: 08070000 00000000 1B0C0404 88010000 |................| ; LINUX-NEXT: 0010: 08070000 00000000 1B0C0404 88010000 |................|
; LINUX-NEXT: 0020: 1C000000 24000000 00000000 1D000000 |....$...........| ; LINUX-NEXT: 0020: 24000000 24000000 00000000 1D000000 |$...$...........|
; LINUX-NEXT: 0030: 04000000 00410E08 8502420D 05432E10 |.....A....B..C..| ; LINUX-NEXT: 0030: 04000000 00410E08 8502420D 05432E10 |.....A....B..C..|
; LINUX-NEXT: 0040: 540C0404 410C0508 |T...A...|
; LINUX-NEXT: ) ; LINUX-NEXT: )
declare i32 @__gxx_personality_v0(...) declare i32 @__gxx_personality_v0(...)

View File

@ -82,8 +82,9 @@ cleanup:
; LINUX-NEXT: Lcfi{{[0-9]+}}: ; LINUX-NEXT: Lcfi{{[0-9]+}}:
; LINUX-NEXT: .cfi_adjust_cfa_offset 4 ; LINUX-NEXT: .cfi_adjust_cfa_offset 4
; LINUX-NEXT: call ; LINUX-NEXT: call
; LINUX-NEXT: addl $28, %esp ; LINUX-NEXT: addl $16, %esp
; LINUX: .cfi_adjust_cfa_offset -16 ; LINUX: .cfi_adjust_cfa_offset -16
; LINUX: addl $12, %esp
; DARWIN-NOT: .cfi_escape ; DARWIN-NOT: .cfi_escape
; DARWIN-NOT: pushl ; DARWIN-NOT: pushl
define void @test2_nofp() #0 personality i8* bitcast (i32 (...)* @__gxx_personality_v0 to i8*) { define void @test2_nofp() #0 personality i8* bitcast (i32 (...)* @__gxx_personality_v0 to i8*) {

View File

@ -106,6 +106,8 @@ entry:
; CHECK: call ; CHECK: call
; CHECK-NEXT: movzbl ; CHECK-NEXT: movzbl
; CHECK-NEXT: {{pop|add}} ; CHECK-NEXT: {{pop|add}}
; CHECK-NEXT: :
; CHECK-NEXT: .cfi_def_cfa_offset {{4|8}}
; CHECK-NEXT: ret ; CHECK-NEXT: ret
} }
@ -120,6 +122,8 @@ entry:
; CHECK: call ; CHECK: call
; CHECK-NEXT: movzbl ; CHECK-NEXT: movzbl
; CHECK-NEXT: {{pop|add}} ; CHECK-NEXT: {{pop|add}}
; CHECK-NEXT: :
; CHECK-NEXT: .cfi_def_cfa_offset {{4|8}}
; CHECK-NEXT: ret ; CHECK-NEXT: ret
} }
@ -134,5 +138,7 @@ entry:
; CHECK: call ; CHECK: call
; CHECK-NEXT: movzwl ; CHECK-NEXT: movzwl
; CHECK-NEXT: {{pop|add}} ; CHECK-NEXT: {{pop|add}}
; CHECK-NEXT: :
; CHECK-NEXT: .cfi_def_cfa_offset {{4|8}}
; CHECK-NEXT: ret ; CHECK-NEXT: ret
} }

View File

@ -76,6 +76,8 @@ define void @f2(i32 %x) nounwind uwtable {
; X64-NEXT: xabort $1 ; X64-NEXT: xabort $1
; X64-NEXT: callq f1 ; X64-NEXT: callq f1
; X64-NEXT: popq %rax ; X64-NEXT: popq %rax
; X64-NEXT: .Lcfi1:
; X64-NEXT: .cfi_def_cfa_offset 8
; X64-NEXT: retq ; X64-NEXT: retq
entry: entry:
%x.addr = alloca i32, align 4 %x.addr = alloca i32, align 4

View File

@ -92,6 +92,8 @@ define void @pr26232(i64 %a, <16 x i1> %b) {
; KNL-32-NEXT: jne .LBB1_1 ; KNL-32-NEXT: jne .LBB1_1
; KNL-32-NEXT: # BB#2: # %for_exit600 ; KNL-32-NEXT: # BB#2: # %for_exit600
; KNL-32-NEXT: popl %esi ; KNL-32-NEXT: popl %esi
; KNL-32-NEXT: .Lcfi2:
; KNL-32-NEXT: .cfi_def_cfa_offset 4
; KNL-32-NEXT: retl ; KNL-32-NEXT: retl
allocas: allocas:
br label %for_test11.preheader br label %for_test11.preheader

View File

@ -83,6 +83,8 @@ define i1 @test_relocate(i32 addrspace(1)* %a) gc "statepoint-example" {
; CHECK: callq return_i1 ; CHECK: callq return_i1
; CHECK-NEXT: .Ltmp5: ; CHECK-NEXT: .Ltmp5:
; CHECK-NEXT: popq %rcx ; CHECK-NEXT: popq %rcx
; CHECK-NEXT: .Lcfi11:
; CHECK-NEXT: .cfi_def_cfa_offset 8
; CHECK-NEXT: retq ; CHECK-NEXT: retq
entry: entry:
%safepoint_token = tail call token (i64, i32, i1 ()*, i32, i32, ...) @llvm.experimental.gc.statepoint.p0f_i1f(i64 0, i32 0, i1 ()* @return_i1, i32 0, i32 0, i32 0, i32 0, i32 addrspace(1)* %a) %safepoint_token = tail call token (i64, i32, i1 ()*, i32, i32, ...) @llvm.experimental.gc.statepoint.p0f_i1f(i64 0, i32 0, i1 ()* @return_i1, i32 0, i32 0, i32 0, i32 0, i32 addrspace(1)* %a)

View File

@ -69,6 +69,8 @@ define i1 @test_relocate(i32 addrspace(1)* %a) gc "statepoint-example" {
; CHECK: callq return_i1 ; CHECK: callq return_i1
; CHECK-NEXT: .Ltmp4: ; CHECK-NEXT: .Ltmp4:
; CHECK-NEXT: popq %rcx ; CHECK-NEXT: popq %rcx
; CHECK-NEXT: :
; CHECK-NEXT: .cfi_def_cfa_offset 8
; CHECK-NEXT: retq ; CHECK-NEXT: retq
entry: entry:
%safepoint_token = tail call token (i64, i32, i1 ()*, i32, i32, ...) @llvm.experimental.gc.statepoint.p0f_i1f(i64 0, i32 0, i1 ()* @return_i1, i32 0, i32 1, i32 0, i32 0, i32 addrspace(1)* %a) %safepoint_token = tail call token (i64, i32, i1 ()*, i32, i32, ...) @llvm.experimental.gc.statepoint.p0f_i1f(i64 0, i32 0, i1 ()* @return_i1, i32 0, i32 1, i32 0, i32 0, i32 addrspace(1)* %a)

View File

@ -142,6 +142,8 @@ normal_return:
; CHECK-LABEL: %normal_return ; CHECK-LABEL: %normal_return
; CHECK: xorl %eax, %eax ; CHECK: xorl %eax, %eax
; CHECK-NEXT: popq ; CHECK-NEXT: popq
; CHECK-NEXT: :
; CHECK-NEXT: .cfi_def_cfa_offset 8
; CHECK-NEXT: retq ; CHECK-NEXT: retq
%null.relocated = call coldcc i64 addrspace(1)* @llvm.experimental.gc.relocate.p1i64(token %sp1, i32 13, i32 13) %null.relocated = call coldcc i64 addrspace(1)* @llvm.experimental.gc.relocate.p1i64(token %sp1, i32 13, i32 13)
%undef.relocated = call coldcc i64 addrspace(1)* @llvm.experimental.gc.relocate.p1i64(token %sp1, i32 14, i32 14) %undef.relocated = call coldcc i64 addrspace(1)* @llvm.experimental.gc.relocate.p1i64(token %sp1, i32 14, i32 14)
@ -169,6 +171,8 @@ entry:
normal_return: normal_return:
; CHECK: leaq ; CHECK: leaq
; CHECK-NEXT: popq ; CHECK-NEXT: popq
; CHECK-NEXT: :
; CHECK-NEXT: .cfi_def_cfa_offset 8
; CHECK-NEXT: retq ; CHECK-NEXT: retq
%aa.rel = call coldcc i32 addrspace(1)* @llvm.experimental.gc.relocate.p1i32(token %sp, i32 13, i32 13) %aa.rel = call coldcc i32 addrspace(1)* @llvm.experimental.gc.relocate.p1i32(token %sp, i32 13, i32 13)
%aa.converted = bitcast i32 addrspace(1)* %aa.rel to i64 addrspace(1)* %aa.converted = bitcast i32 addrspace(1)* %aa.rel to i64 addrspace(1)*
@ -177,6 +181,8 @@ normal_return:
exceptional_return: exceptional_return:
; CHECK: movl $15 ; CHECK: movl $15
; CHECK-NEXT: popq ; CHECK-NEXT: popq
; CHECK-NEXT: :
; CHECK-NEXT: .cfi_def_cfa_offset 8
; CHECK-NEXT: retq ; CHECK-NEXT: retq
%landing_pad = landingpad token %landing_pad = landingpad token
cleanup cleanup

View File

@ -0,0 +1,103 @@
; RUN: llc %s -o - | FileCheck %s
; ModuleID = 'throws-cfi-fp.cpp'
source_filename = "throws-cfi-fp.cpp"
target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
target triple = "x86_64-unknown-linux-gnu"
$__clang_call_terminate = comdat any
@_ZL11ShouldThrow = internal unnamed_addr global i1 false, align 1
@_ZTIi = external constant i8*
@str = private unnamed_addr constant [20 x i8] c"Threw an exception!\00"
; Function Attrs: uwtable
define void @_Z6throwsv() #0 personality i8* bitcast (i32 (...)* @__gxx_personality_v0 to i8*) {
; CHECK-LABEL: _Z6throwsv:
; CHECK: popq %rbp
; CHECK-NEXT: :
; CHECK-NEXT: .cfi_def_cfa %rsp, 8
; CHECK-NEXT: retq
; CHECK-NEXT: .LBB0_1:
; CHECK-NEXT: :
; CHECK-NEXT: .cfi_def_cfa %rbp, 16
entry:
%.b5 = load i1, i1* @_ZL11ShouldThrow, align 1
br i1 %.b5, label %if.then, label %try.cont
if.then: ; preds = %entry
%exception = tail call i8* @__cxa_allocate_exception(i64 4)
%0 = bitcast i8* %exception to i32*
store i32 1, i32* %0, align 16
invoke void @__cxa_throw(i8* %exception, i8* bitcast (i8** @_ZTIi to i8*), i8* null)
to label %unreachable unwind label %lpad
lpad: ; preds = %if.then
%1 = landingpad { i8*, i32 }
catch i8* null
%2 = extractvalue { i8*, i32 } %1, 0
%3 = tail call i8* @__cxa_begin_catch(i8* %2)
%puts = tail call i32 @puts(i8* getelementptr inbounds ([20 x i8], [20 x i8]* @str, i64 0, i64 0))
invoke void @__cxa_rethrow() #4
to label %unreachable unwind label %lpad1
lpad1: ; preds = %lpad
%4 = landingpad { i8*, i32 }
cleanup
invoke void @__cxa_end_catch()
to label %eh.resume unwind label %terminate.lpad
try.cont: ; preds = %entry
ret void
eh.resume: ; preds = %lpad1
resume { i8*, i32 } %4
terminate.lpad: ; preds = %lpad1
%5 = landingpad { i8*, i32 }
catch i8* null
%6 = extractvalue { i8*, i32 } %5, 0
tail call void @__clang_call_terminate(i8* %6) #5
unreachable
unreachable: ; preds = %lpad, %if.then
unreachable
}
declare i8* @__cxa_allocate_exception(i64)
declare void @__cxa_throw(i8*, i8*, i8*)
declare i32 @__gxx_personality_v0(...)
declare i8* @__cxa_begin_catch(i8*)
declare void @__cxa_rethrow()
declare void @__cxa_end_catch()
; Function Attrs: noinline noreturn nounwind
declare void @__clang_call_terminate(i8*)
declare void @_ZSt9terminatev()
; Function Attrs: nounwind
declare i32 @puts(i8* nocapture readonly) #3
attributes #0 = { "no-frame-pointer-elim"="true" }
!llvm.dbg.cu = !{!0}
!llvm.module.flags = !{!7, !8, !9}
!0 = distinct !DICompileUnit(language: DW_LANG_C_plus_plus, file: !1, producer: "clang version 5.0.0 (http://llvm.org/git/clang.git 3f8116e6a2815b1d5f3491493938d0c63c9f42c9) (http://llvm.org/git/llvm.git 4fde77f8f1a8e4482e69b6a7484bc7d1b99b3c0a)", isOptimized: true, runtimeVersion: 0, emissionKind: FullDebug, enums: !2, globals: !3)
!1 = !DIFile(filename: "throws-cfi-fp.cpp", directory: "epilogue-dwarf/test")
!2 = !{}
!3 = !{!4}
!4 = !DIGlobalVariableExpression(var: !5)
!5 = distinct !DIGlobalVariable(name: "ShouldThrow", linkageName: "_ZL11ShouldThrow", scope: !0, file: !1, line: 2, type: !6, isLocal: true, isDefinition: true)
!6 = !DIBasicType(name: "bool", size: 8, encoding: DW_ATE_boolean)
!7 = !{i32 2, !"Dwarf Version", i32 4}
!8 = !{i32 2, !"Debug Info Version", i32 3}
!9 = !{i32 1, !"wchar_size", i32 4}

View File

@ -0,0 +1,102 @@
; RUN: llc %s -o - | FileCheck %s
; ModuleID = 'throws-cfi-no-fp.cpp'
source_filename = "throws-cfi-no-fp.cpp"
target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
target triple = "x86_64-unknown-linux-gnu"
$__clang_call_terminate = comdat any
@_ZL11ShouldThrow = internal unnamed_addr global i1 false, align 1
@_ZTIi = external constant i8*
@str = private unnamed_addr constant [20 x i8] c"Threw an exception!\00"
; Function Attrs: uwtable
define void @_Z6throwsv() personality i8* bitcast (i32 (...)* @__gxx_personality_v0 to i8*) {
; CHECK-LABEL: _Z6throwsv:
; CHECK: popq %rbx
; CHECK-NEXT: :
; CHECK-NEXT: .cfi_def_cfa_offset 8
; CHECK-NEXT: retq
; CHECK-NEXT: .LBB0_1:
; CHECK-NEXT: :
; CHECK-NEXT: .cfi_def_cfa_offset 16
entry:
%.b5 = load i1, i1* @_ZL11ShouldThrow, align 1
br i1 %.b5, label %if.then, label %try.cont
if.then: ; preds = %entry
%exception = tail call i8* @__cxa_allocate_exception(i64 4)
%0 = bitcast i8* %exception to i32*
store i32 1, i32* %0, align 16
invoke void @__cxa_throw(i8* %exception, i8* bitcast (i8** @_ZTIi to i8*), i8* null)
to label %unreachable unwind label %lpad
lpad: ; preds = %if.then
%1 = landingpad { i8*, i32 }
catch i8* null
%2 = extractvalue { i8*, i32 } %1, 0
%3 = tail call i8* @__cxa_begin_catch(i8* %2)
%puts = tail call i32 @puts(i8* getelementptr inbounds ([20 x i8], [20 x i8]* @str, i64 0, i64 0))
invoke void @__cxa_rethrow() #4
to label %unreachable unwind label %lpad1
lpad1: ; preds = %lpad
%4 = landingpad { i8*, i32 }
cleanup
invoke void @__cxa_end_catch()
to label %eh.resume unwind label %terminate.lpad
try.cont: ; preds = %entry
ret void
eh.resume: ; preds = %lpad1
resume { i8*, i32 } %4
terminate.lpad: ; preds = %lpad1
%5 = landingpad { i8*, i32 }
catch i8* null
%6 = extractvalue { i8*, i32 } %5, 0
tail call void @__clang_call_terminate(i8* %6)
unreachable
unreachable: ; preds = %lpad, %if.then
unreachable
}
declare i8* @__cxa_allocate_exception(i64)
declare void @__cxa_throw(i8*, i8*, i8*)
declare i32 @__gxx_personality_v0(...)
declare i8* @__cxa_begin_catch(i8*)
declare void @__cxa_rethrow()
declare void @__cxa_end_catch()
; Function Attrs: noinline noreturn nounwind
declare void @__clang_call_terminate(i8*)
declare void @_ZSt9terminatev()
; Function Attrs: nounwind
declare i32 @puts(i8* nocapture readonly)
!llvm.dbg.cu = !{!0}
!llvm.module.flags = !{!7, !8, !9}
!0 = distinct !DICompileUnit(language: DW_LANG_C_plus_plus, file: !1, producer: "clang version 5.0.0 (http://llvm.org/git/clang.git 3f8116e6a2815b1d5f3491493938d0c63c9f42c9) (http://llvm.org/git/llvm.git 4fde77f8f1a8e4482e69b6a7484bc7d1b99b3c0a)", isOptimized: true, runtimeVersion: 0, emissionKind: FullDebug, enums: !2, globals: !3)
!1 = !DIFile(filename: "throws-cfi-no-fp.cpp", directory: "epilogue-dwarf/test")
!2 = !{}
!3 = !{!4}
!4 = !DIGlobalVariableExpression(var: !5)
!5 = distinct !DIGlobalVariable(name: "ShouldThrow", linkageName: "_ZL11ShouldThrow", scope: !0, file: !1, line: 2, type: !6, isLocal: true, isDefinition: true)
!6 = !DIBasicType(name: "bool", size: 8, encoding: DW_ATE_boolean)
!7 = !{i32 2, !"Dwarf Version", i32 4}
!8 = !{i32 2, !"Debug Info Version", i32 3}
!9 = !{i32 1, !"wchar_size", i32 4}

View File

@ -3345,11 +3345,23 @@ define <16 x i16> @load_sext_16i1_to_16i16(<16 x i1> *%ptr) {
; AVX1-NEXT: vpinsrw $7, %ebp, %xmm1, %xmm1 ; AVX1-NEXT: vpinsrw $7, %ebp, %xmm1, %xmm1
; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0 ; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0
; AVX1-NEXT: popq %rbx ; AVX1-NEXT: popq %rbx
; AVX1-NEXT: .Lcfi12:
; AVX1-NEXT: .cfi_def_cfa_offset 48
; AVX1-NEXT: popq %r12 ; AVX1-NEXT: popq %r12
; AVX1-NEXT: .Lcfi13:
; AVX1-NEXT: .cfi_def_cfa_offset 40
; AVX1-NEXT: popq %r13 ; AVX1-NEXT: popq %r13
; AVX1-NEXT: .Lcfi14:
; AVX1-NEXT: .cfi_def_cfa_offset 32
; AVX1-NEXT: popq %r14 ; AVX1-NEXT: popq %r14
; AVX1-NEXT: .Lcfi15:
; AVX1-NEXT: .cfi_def_cfa_offset 24
; AVX1-NEXT: popq %r15 ; AVX1-NEXT: popq %r15
; AVX1-NEXT: .Lcfi16:
; AVX1-NEXT: .cfi_def_cfa_offset 16
; AVX1-NEXT: popq %rbp ; AVX1-NEXT: popq %rbp
; AVX1-NEXT: .Lcfi17:
; AVX1-NEXT: .cfi_def_cfa_offset 8
; AVX1-NEXT: retq ; AVX1-NEXT: retq
; ;
; AVX2-LABEL: load_sext_16i1_to_16i16: ; AVX2-LABEL: load_sext_16i1_to_16i16:
@ -3448,11 +3460,23 @@ define <16 x i16> @load_sext_16i1_to_16i16(<16 x i1> *%ptr) {
; AVX2-NEXT: vpinsrw $7, %ebp, %xmm1, %xmm1 ; AVX2-NEXT: vpinsrw $7, %ebp, %xmm1, %xmm1
; AVX2-NEXT: vinserti128 $1, %xmm0, %ymm1, %ymm0 ; AVX2-NEXT: vinserti128 $1, %xmm0, %ymm1, %ymm0
; AVX2-NEXT: popq %rbx ; AVX2-NEXT: popq %rbx
; AVX2-NEXT: .Lcfi12:
; AVX2-NEXT: .cfi_def_cfa_offset 48
; AVX2-NEXT: popq %r12 ; AVX2-NEXT: popq %r12
; AVX2-NEXT: .Lcfi13:
; AVX2-NEXT: .cfi_def_cfa_offset 40
; AVX2-NEXT: popq %r13 ; AVX2-NEXT: popq %r13
; AVX2-NEXT: .Lcfi14:
; AVX2-NEXT: .cfi_def_cfa_offset 32
; AVX2-NEXT: popq %r14 ; AVX2-NEXT: popq %r14
; AVX2-NEXT: .Lcfi15:
; AVX2-NEXT: .cfi_def_cfa_offset 24
; AVX2-NEXT: popq %r15 ; AVX2-NEXT: popq %r15
; AVX2-NEXT: .Lcfi16:
; AVX2-NEXT: .cfi_def_cfa_offset 16
; AVX2-NEXT: popq %rbp ; AVX2-NEXT: popq %rbp
; AVX2-NEXT: .Lcfi17:
; AVX2-NEXT: .cfi_def_cfa_offset 8
; AVX2-NEXT: retq ; AVX2-NEXT: retq
; ;
; AVX512F-LABEL: load_sext_16i1_to_16i16: ; AVX512F-LABEL: load_sext_16i1_to_16i16:
@ -4849,6 +4873,8 @@ define i32 @sext_2i8_to_i32(<16 x i8> %A) nounwind uwtable readnone ssp {
; X32-SSE41-NEXT: pmovsxbw %xmm0, %xmm0 ; X32-SSE41-NEXT: pmovsxbw %xmm0, %xmm0
; X32-SSE41-NEXT: movd %xmm0, %eax ; X32-SSE41-NEXT: movd %xmm0, %eax
; X32-SSE41-NEXT: popl %ecx ; X32-SSE41-NEXT: popl %ecx
; X32-SSE41-NEXT: .Lcfi1:
; X32-SSE41-NEXT: .cfi_def_cfa_offset 4
; X32-SSE41-NEXT: retl ; X32-SSE41-NEXT: retl
entry: entry:
%Shuf = shufflevector <16 x i8> %A, <16 x i8> undef, <2 x i32> <i32 0, i32 1> %Shuf = shufflevector <16 x i8> %A, <16 x i8> undef, <2 x i32> <i32 0, i32 1>

View File

@ -622,6 +622,8 @@ define <64 x i8> @test_mm512_mask_blend_epi8(<64 x i8> %A, <64 x i8> %W){
; KNL32-NEXT: vpblendvb %ymm3, 8(%ebp), %ymm1, %ymm1 ; KNL32-NEXT: vpblendvb %ymm3, 8(%ebp), %ymm1, %ymm1
; KNL32-NEXT: movl %ebp, %esp ; KNL32-NEXT: movl %ebp, %esp
; KNL32-NEXT: popl %ebp ; KNL32-NEXT: popl %ebp
; KNL32-NEXT: .Lcfi3:
; KNL32-NEXT: .cfi_def_cfa %esp, 4
; KNL32-NEXT: retl ; KNL32-NEXT: retl
entry: entry:
%0 = shufflevector <64 x i8> %A, <64 x i8> %W, <64 x i32> <i32 64, i32 1, i32 66, i32 3, i32 68, i32 5, i32 70, i32 7, i32 72, i32 9, i32 74, i32 11, i32 76, i32 13, i32 78, i32 15, i32 80, i32 17, i32 82, i32 19, i32 84, i32 21, i32 86, i32 23, i32 88, i32 25, i32 90, i32 27, i32 92, i32 29, i32 94, i32 31, i32 96, i32 33, i32 98, i32 35, i32 100, i32 37, i32 102, i32 39, i32 104, i32 41, i32 106, i32 43, i32 108, i32 45, i32 110, i32 47, i32 112, i32 49, i32 114, i32 51, i32 116, i32 53, i32 118, i32 55, i32 120, i32 57, i32 122, i32 59, i32 124, i32 61, i32 126, i32 63> %0 = shufflevector <64 x i8> %A, <64 x i8> %W, <64 x i32> <i32 64, i32 1, i32 66, i32 3, i32 68, i32 5, i32 70, i32 7, i32 72, i32 9, i32 74, i32 11, i32 76, i32 13, i32 78, i32 15, i32 80, i32 17, i32 82, i32 19, i32 84, i32 21, i32 86, i32 23, i32 88, i32 25, i32 90, i32 27, i32 92, i32 29, i32 94, i32 31, i32 96, i32 33, i32 98, i32 35, i32 100, i32 37, i32 102, i32 39, i32 104, i32 41, i32 106, i32 43, i32 108, i32 45, i32 110, i32 47, i32 112, i32 49, i32 114, i32 51, i32 116, i32 53, i32 118, i32 55, i32 120, i32 57, i32 122, i32 59, i32 124, i32 61, i32 126, i32 63>
@ -652,12 +654,12 @@ define <32 x i16> @test_mm512_mask_blend_epi16(<32 x i16> %A, <32 x i16> %W){
; KNL32-LABEL: test_mm512_mask_blend_epi16: ; KNL32-LABEL: test_mm512_mask_blend_epi16:
; KNL32: # BB#0: # %entry ; KNL32: # BB#0: # %entry
; KNL32-NEXT: pushl %ebp ; KNL32-NEXT: pushl %ebp
; KNL32-NEXT: .Lcfi3:
; KNL32-NEXT: .cfi_def_cfa_offset 8
; KNL32-NEXT: .Lcfi4: ; KNL32-NEXT: .Lcfi4:
; KNL32-NEXT: .cfi_def_cfa_offset 8
; KNL32-NEXT: .Lcfi5:
; KNL32-NEXT: .cfi_offset %ebp, -8 ; KNL32-NEXT: .cfi_offset %ebp, -8
; KNL32-NEXT: movl %esp, %ebp ; KNL32-NEXT: movl %esp, %ebp
; KNL32-NEXT: .Lcfi5: ; KNL32-NEXT: .Lcfi6:
; KNL32-NEXT: .cfi_def_cfa_register %ebp ; KNL32-NEXT: .cfi_def_cfa_register %ebp
; KNL32-NEXT: andl $-32, %esp ; KNL32-NEXT: andl $-32, %esp
; KNL32-NEXT: subl $32, %esp ; KNL32-NEXT: subl $32, %esp
@ -665,6 +667,8 @@ define <32 x i16> @test_mm512_mask_blend_epi16(<32 x i16> %A, <32 x i16> %W){
; KNL32-NEXT: vpblendw {{.*#+}} ymm1 = mem[0],ymm1[1],mem[2],ymm1[3],mem[4],ymm1[5],mem[6],ymm1[7],mem[8],ymm1[9],mem[10],ymm1[11],mem[12],ymm1[13],mem[14],ymm1[15] ; KNL32-NEXT: vpblendw {{.*#+}} ymm1 = mem[0],ymm1[1],mem[2],ymm1[3],mem[4],ymm1[5],mem[6],ymm1[7],mem[8],ymm1[9],mem[10],ymm1[11],mem[12],ymm1[13],mem[14],ymm1[15]
; KNL32-NEXT: movl %ebp, %esp ; KNL32-NEXT: movl %ebp, %esp
; KNL32-NEXT: popl %ebp ; KNL32-NEXT: popl %ebp
; KNL32-NEXT: .Lcfi7:
; KNL32-NEXT: .cfi_def_cfa %esp, 4
; KNL32-NEXT: retl ; KNL32-NEXT: retl
entry: entry:
%0 = shufflevector <32 x i16> %A, <32 x i16> %W, <32 x i32> <i32 32, i32 1, i32 34, i32 3, i32 36, i32 5, i32 38, i32 7, i32 40, i32 9, i32 42, i32 11, i32 44, i32 13, i32 46, i32 15, i32 48, i32 17, i32 50, i32 19, i32 52, i32 21, i32 54, i32 23, i32 56, i32 25, i32 58, i32 27, i32 60, i32 29, i32 62, i32 31> %0 = shufflevector <32 x i16> %A, <32 x i16> %W, <32 x i32> <i32 32, i32 1, i32 34, i32 3, i32 36, i32 5, i32 38, i32 7, i32 40, i32 9, i32 42, i32 11, i32 44, i32 13, i32 46, i32 15, i32 48, i32 17, i32 50, i32 19, i32 52, i32 21, i32 54, i32 23, i32 56, i32 25, i32 58, i32 27, i32 60, i32 29, i32 62, i32 31>

View File

@ -445,6 +445,8 @@ define i64 @shuf64i1_zero(i64 %a) {
; AVX512F-NEXT: orq %rcx, %rax ; AVX512F-NEXT: orq %rcx, %rax
; AVX512F-NEXT: movq %rbp, %rsp ; AVX512F-NEXT: movq %rbp, %rsp
; AVX512F-NEXT: popq %rbp ; AVX512F-NEXT: popq %rbp
; AVX512F-NEXT: .Lcfi3:
; AVX512F-NEXT: .cfi_def_cfa %rsp, 8
; AVX512F-NEXT: vzeroupper ; AVX512F-NEXT: vzeroupper
; AVX512F-NEXT: retq ; AVX512F-NEXT: retq
; ;

View File

@ -107,10 +107,16 @@ define i32 @test_wide(i128 %a, i128 %b) {
; CHECK-NEXT: # BB#1: # %bb1 ; CHECK-NEXT: # BB#1: # %bb1
; CHECK-NEXT: movl $1, %eax ; CHECK-NEXT: movl $1, %eax
; CHECK-NEXT: popl %esi ; CHECK-NEXT: popl %esi
; CHECK-NEXT: .Lcfi2:
; CHECK-NEXT: .cfi_def_cfa_offset 4
; CHECK-NEXT: retl ; CHECK-NEXT: retl
; CHECK-NEXT: .LBB4_2: # %bb2 ; CHECK-NEXT: .LBB4_2: # %bb2
; CHECK-NEXT: .Lcfi3:
; CHECK-NEXT: .cfi_def_cfa_offset 8
; CHECK-NEXT: movl $2, %eax ; CHECK-NEXT: movl $2, %eax
; CHECK-NEXT: popl %esi ; CHECK-NEXT: popl %esi
; CHECK-NEXT: .Lcfi4:
; CHECK-NEXT: .cfi_def_cfa_offset 4
; CHECK-NEXT: retl ; CHECK-NEXT: retl
entry: entry:
%cmp = icmp slt i128 %a, %b %cmp = icmp slt i128 %a, %b

View File

@ -6,6 +6,8 @@ target triple = "x86_64-unknown-linux-gnu"
; CHECK: pushq ; CHECK: pushq
; CHECK: ud2 ; CHECK: ud2
; CHECK-NEXT: popq ; CHECK-NEXT: popq
; CHECK-NEXT: :
; CHECK-NEXT: .cfi_def_cfa_offset 8
; CHECK-NEXT: retq ; CHECK-NEXT: retq
define void @bar() { define void @bar() {
entry: entry:

View File

@ -23,6 +23,8 @@ define x86_64_sysvcc i32 @bar(i32 %a0, i32 %a1, float %b0) #0 {
; CHECK-NEXT: movl $4, %eax ; CHECK-NEXT: movl $4, %eax
; CHECK-NEXT: movaps -{{[0-9]+}}(%rsp), %xmm1 # 16-byte Reload ; CHECK-NEXT: movaps -{{[0-9]+}}(%rsp), %xmm1 # 16-byte Reload
; CHECK-NEXT: popq %rdx ; CHECK-NEXT: popq %rdx
; CHECK-NEXT: .Lcfi3:
; CHECK-NEXT: .cfi_def_cfa_offset 8
; CHECK-NEXT: retq ; CHECK-NEXT: retq
call void asm sideeffect "", "~{rax},~{rdx},~{xmm1},~{rdi},~{rsi},~{xmm0}"() call void asm sideeffect "", "~{rax},~{rdx},~{xmm1},~{rdi},~{rsi},~{xmm0}"()
ret i32 4 ret i32 4