mirror of
https://github.com/RPCS3/llvm-mirror.git
synced 2024-11-24 03:33:20 +01:00
X86: Use push-pop for materializing 8-bit immediates for minsize (take 2)
This is the same as r255936, with added logic for avoiding clobbering of the red zone (PR26023). Differential Revision: http://reviews.llvm.org/D18246 llvm-svn: 264375
This commit is contained in:
parent
bfd4cf42ec
commit
9fe6bf47fd
@ -958,6 +958,7 @@ void X86FrameLowering::emitPrologue(MachineFunction &MF,
|
||||
!MF.shouldSplitStack()) { // Regular stack
|
||||
uint64_t MinSize = X86FI->getCalleeSavedFrameSize();
|
||||
if (HasFP) MinSize += SlotSize;
|
||||
X86FI->setUsesRedZone(MinSize > 0 || StackSize > 0);
|
||||
StackSize = std::max(MinSize, StackSize > 128 ? StackSize - 128 : 0);
|
||||
MFI->setStackSize(StackSize);
|
||||
}
|
||||
|
@ -157,9 +157,13 @@ namespace {
|
||||
/// performance.
|
||||
bool OptForSize;
|
||||
|
||||
/// If true, selector should try to optimize for minimum code size.
|
||||
bool OptForMinSize;
|
||||
|
||||
public:
|
||||
explicit X86DAGToDAGISel(X86TargetMachine &tm, CodeGenOpt::Level OptLevel)
|
||||
: SelectionDAGISel(tm, OptLevel), OptForSize(false) {}
|
||||
: SelectionDAGISel(tm, OptLevel), OptForSize(false),
|
||||
OptForMinSize(false) {}
|
||||
|
||||
const char *getPassName() const override {
|
||||
return "X86 DAG->DAG Instruction Selection";
|
||||
@ -530,8 +534,10 @@ static bool isCalleeLoad(SDValue Callee, SDValue &Chain, bool HasCallSeq) {
|
||||
}
|
||||
|
||||
void X86DAGToDAGISel::PreprocessISelDAG() {
|
||||
// OptForSize is used in pattern predicates that isel is matching.
|
||||
// OptFor[Min]Size are used in pattern predicates that isel is matching.
|
||||
OptForSize = MF->getFunction()->optForSize();
|
||||
OptForMinSize = MF->getFunction()->optForMinSize();
|
||||
assert((!OptForMinSize || OptForSize) && "OptForMinSize implies OptForSize");
|
||||
|
||||
for (SelectionDAG::allnodes_iterator I = CurDAG->allnodes_begin(),
|
||||
E = CurDAG->allnodes_end(); I != E; ) {
|
||||
|
@ -250,7 +250,7 @@ def MORESTACK_RET_RESTORE_R10 : I<0, Pseudo, (outs), (ins),
|
||||
// Alias instruction mapping movr0 to xor.
|
||||
// FIXME: remove when we can teach regalloc that xor reg, reg is ok.
|
||||
let Defs = [EFLAGS], isReMaterializable = 1, isAsCheapAsAMove = 1,
|
||||
isPseudo = 1 in
|
||||
isPseudo = 1, AddedComplexity = 20 in
|
||||
def MOV32r0 : I<0, Pseudo, (outs GR32:$dst), (ins), "",
|
||||
[(set GR32:$dst, 0)], IIC_ALU_NONMEM>, Sched<[WriteZero]>;
|
||||
|
||||
@ -263,7 +263,7 @@ def : Pat<(i64 0), (SUBREG_TO_REG (i64 0), (MOV32r0), sub_32bit)> {
|
||||
}
|
||||
|
||||
let Predicates = [OptForSize, NotSlowIncDec, Not64BitMode],
|
||||
AddedComplexity = 1 in {
|
||||
AddedComplexity = 15 in {
|
||||
// Pseudo instructions for materializing 1 and -1 using XOR+INC/DEC,
|
||||
// which only require 3 bytes compared to MOV32ri which requires 5.
|
||||
let Defs = [EFLAGS], isReMaterializable = 1, isPseudo = 1 in {
|
||||
@ -278,6 +278,17 @@ let Predicates = [OptForSize, NotSlowIncDec, Not64BitMode],
|
||||
def : Pat<(i16 -1), (EXTRACT_SUBREG (MOV32r_1), sub_16bit)>;
|
||||
}
|
||||
|
||||
let isReMaterializable = 1, isPseudo = 1, AddedComplexity = 10 in {
|
||||
// AddedComplexity higher than MOV64ri but lower than MOV32r0 and MOV32r1.
|
||||
// FIXME: Add itinerary class and Schedule.
|
||||
def MOV32ImmSExti8 : I<0, Pseudo, (outs GR32:$dst), (ins i32i8imm:$src), "",
|
||||
[(set GR32:$dst, i32immSExt8:$src)]>,
|
||||
Requires<[OptForMinSize, NotWin64WithoutFP]>;
|
||||
def MOV64ImmSExti8 : I<0, Pseudo, (outs GR64:$dst), (ins i64i8imm:$src), "",
|
||||
[(set GR64:$dst, i64immSExt8:$src)]>,
|
||||
Requires<[OptForMinSize, NotWin64WithoutFP]>;
|
||||
}
|
||||
|
||||
// Materialize i64 constant where top 32-bits are zero. This could theoretically
|
||||
// use MOV32ri with a SUBREG_TO_REG to represent the zero-extension, however
|
||||
// that would make it more difficult to rematerialize.
|
||||
|
@ -23,6 +23,7 @@
|
||||
#include "llvm/CodeGen/MachineDominators.h"
|
||||
#include "llvm/CodeGen/MachineFrameInfo.h"
|
||||
#include "llvm/CodeGen/MachineInstrBuilder.h"
|
||||
#include "llvm/CodeGen/MachineModuleInfo.h"
|
||||
#include "llvm/CodeGen/MachineRegisterInfo.h"
|
||||
#include "llvm/CodeGen/StackMaps.h"
|
||||
#include "llvm/IR/DerivedTypes.h"
|
||||
@ -5391,6 +5392,60 @@ static bool expandMOV32r1(MachineInstrBuilder &MIB, const TargetInstrInfo &TII,
|
||||
return true;
|
||||
}
|
||||
|
||||
bool X86InstrInfo::ExpandMOVImmSExti8(MachineInstrBuilder &MIB) const {
|
||||
MachineBasicBlock &MBB = *MIB->getParent();
|
||||
DebugLoc DL = MIB->getDebugLoc();
|
||||
int64_t Imm = MIB->getOperand(1).getImm();
|
||||
assert(Imm != 0 && "Using push/pop for 0 is not efficient.");
|
||||
MachineBasicBlock::iterator I = MIB.getInstr();
|
||||
|
||||
int StackAdjustment;
|
||||
|
||||
if (Subtarget.is64Bit()) {
|
||||
assert(MIB->getOpcode() == X86::MOV64ImmSExti8 ||
|
||||
MIB->getOpcode() == X86::MOV32ImmSExti8);
|
||||
|
||||
// Can't use push/pop lowering if the function might write to the red zone.
|
||||
X86MachineFunctionInfo *X86FI =
|
||||
MBB.getParent()->getInfo<X86MachineFunctionInfo>();
|
||||
if (X86FI->getUsesRedZone()) {
|
||||
MIB->setDesc(get(MIB->getOpcode() == X86::MOV32ImmSExti8 ? X86::MOV32ri
|
||||
: X86::MOV64ri));
|
||||
return true;
|
||||
}
|
||||
|
||||
// 64-bit mode doesn't have 32-bit push/pop, so use 64-bit operations and
|
||||
// widen the register if necessary.
|
||||
StackAdjustment = 8;
|
||||
BuildMI(MBB, I, DL, get(X86::PUSH64i8)).addImm(Imm);
|
||||
MIB->setDesc(get(X86::POP64r));
|
||||
MIB->getOperand(0)
|
||||
.setReg(getX86SubSuperRegister(MIB->getOperand(0).getReg(), 64));
|
||||
} else {
|
||||
assert(MIB->getOpcode() == X86::MOV32ImmSExti8);
|
||||
StackAdjustment = 4;
|
||||
BuildMI(MBB, I, DL, get(X86::PUSH32i8)).addImm(Imm);
|
||||
MIB->setDesc(get(X86::POP32r));
|
||||
}
|
||||
|
||||
// Build CFI if necessary.
|
||||
MachineFunction &MF = *MBB.getParent();
|
||||
const X86FrameLowering *TFL = Subtarget.getFrameLowering();
|
||||
bool IsWin64Prologue = MF.getTarget().getMCAsmInfo()->usesWindowsCFI();
|
||||
bool NeedsDwarfCFI =
|
||||
!IsWin64Prologue &&
|
||||
(MF.getMMI().hasDebugInfo() || MF.getFunction()->needsUnwindTableEntry());
|
||||
bool EmitCFI = !TFL->hasFP(MF) && NeedsDwarfCFI;
|
||||
if (EmitCFI) {
|
||||
TFL->BuildCFI(MBB, I, DL,
|
||||
MCCFIInstruction::createAdjustCfaOffset(nullptr, StackAdjustment));
|
||||
TFL->BuildCFI(MBB, std::next(I), DL,
|
||||
MCCFIInstruction::createAdjustCfaOffset(nullptr, -StackAdjustment));
|
||||
}
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
// LoadStackGuard has so far only been implemented for 64-bit MachO. Different
|
||||
// code sequence is needed for other targets.
|
||||
static void expandLoadStackGuard(MachineInstrBuilder &MIB,
|
||||
@ -5423,6 +5478,9 @@ bool X86InstrInfo::expandPostRAPseudo(MachineBasicBlock::iterator MI) const {
|
||||
return expandMOV32r1(MIB, *this, /*MinusOne=*/ false);
|
||||
case X86::MOV32r_1:
|
||||
return expandMOV32r1(MIB, *this, /*MinusOne=*/ true);
|
||||
case X86::MOV32ImmSExti8:
|
||||
case X86::MOV64ImmSExti8:
|
||||
return ExpandMOVImmSExti8(MIB);
|
||||
case X86::SETB_C8r:
|
||||
return Expand2AddrUndef(MIB, get(X86::SBB8rr));
|
||||
case X86::SETB_C16r:
|
||||
|
@ -23,6 +23,7 @@
|
||||
#include "X86GenInstrInfo.inc"
|
||||
|
||||
namespace llvm {
|
||||
class MachineInstrBuilder;
|
||||
class X86RegisterInfo;
|
||||
class X86Subtarget;
|
||||
|
||||
@ -564,6 +565,9 @@ private:
|
||||
/// operand and follow operands form a reference to the stack frame.
|
||||
bool isFrameOperand(const MachineInstr *MI, unsigned int Op,
|
||||
int &FrameIndex) const;
|
||||
|
||||
/// Expand the MOVImmSExti8 pseudo-instructions.
|
||||
bool ExpandMOVImmSExti8(MachineInstrBuilder &MIB) const;
|
||||
};
|
||||
|
||||
} // End llvm namespace
|
||||
|
@ -865,6 +865,8 @@ def In32BitMode : Predicate<"Subtarget->is32Bit()">,
|
||||
AssemblerPredicate<"Mode32Bit", "32-bit mode">;
|
||||
def IsWin64 : Predicate<"Subtarget->isTargetWin64()">;
|
||||
def NotWin64 : Predicate<"!Subtarget->isTargetWin64()">;
|
||||
def NotWin64WithoutFP : Predicate<"!Subtarget->isTargetWin64() ||"
|
||||
"Subtarget->getFrameLowering()->hasFP(*MF)">;
|
||||
def IsPS4 : Predicate<"Subtarget->isTargetPS4()">;
|
||||
def NotPS4 : Predicate<"!Subtarget->isTargetPS4()">;
|
||||
def IsNaCl : Predicate<"Subtarget->isTargetNaCl()">;
|
||||
@ -878,6 +880,7 @@ def NearData : Predicate<"TM.getCodeModel() == CodeModel::Small ||"
|
||||
def IsStatic : Predicate<"TM.getRelocationModel() == Reloc::Static">;
|
||||
def IsNotPIC : Predicate<"TM.getRelocationModel() != Reloc::PIC_">;
|
||||
def OptForSize : Predicate<"OptForSize">;
|
||||
def OptForMinSize : Predicate<"OptForMinSize">;
|
||||
def OptForSpeed : Predicate<"!OptForSize">;
|
||||
def FastBTMem : Predicate<"!Subtarget->isBTMemSlow()">;
|
||||
def CallImmAddr : Predicate<"Subtarget->IsLegalToCallImmediateAddr(TM)">;
|
||||
|
@ -96,6 +96,9 @@ class X86MachineFunctionInfo : public MachineFunctionInfo {
|
||||
/// copies.
|
||||
bool IsSplitCSR = false;
|
||||
|
||||
/// True if this function uses the red zone.
|
||||
bool UsesRedZone = false;
|
||||
|
||||
private:
|
||||
/// ForwardedMustTailRegParms - A list of virtual and physical registers
|
||||
/// that must be forwarded to every musttail call.
|
||||
@ -167,6 +170,9 @@ public:
|
||||
|
||||
bool isSplitCSR() const { return IsSplitCSR; }
|
||||
void setIsSplitCSR(bool s) { IsSplitCSR = s; }
|
||||
|
||||
bool getUsesRedZone() const { return UsesRedZone; }
|
||||
void setUsesRedZone(bool V) { UsesRedZone = V; }
|
||||
};
|
||||
|
||||
} // End llvm namespace
|
||||
|
@ -1,100 +0,0 @@
|
||||
; RUN: llc -mtriple=i686-unknown-linux-gnu -mattr=+cmov %s -o - | FileCheck %s --check-prefix=CHECK32
|
||||
; RUN: llc -mtriple=x86_64-unknown-linux-gnu -mattr=+cmov %s -o - | FileCheck %s --check-prefix=CHECK64
|
||||
|
||||
define i32 @one32() optsize {
|
||||
entry:
|
||||
ret i32 1
|
||||
|
||||
; CHECK32-LABEL: one32
|
||||
; CHECK32: xorl %eax, %eax
|
||||
; CHECK32-NEXT: incl %eax
|
||||
; CHECK32-NEXT: ret
|
||||
|
||||
; FIXME: Figure out the best approach in 64-bit mode.
|
||||
; CHECK64-LABEL: one32
|
||||
; CHECK64: movl $1, %eax
|
||||
; CHECK64-NEXT: retq
|
||||
}
|
||||
|
||||
define i32 @minus_one32() optsize {
|
||||
entry:
|
||||
ret i32 -1
|
||||
|
||||
; CHECK32-LABEL: minus_one32
|
||||
; CHECK32: xorl %eax, %eax
|
||||
; CHECK32-NEXT: decl %eax
|
||||
; CHECK32-NEXT: ret
|
||||
}
|
||||
|
||||
define i16 @one16() optsize {
|
||||
entry:
|
||||
ret i16 1
|
||||
|
||||
; CHECK32-LABEL: one16
|
||||
; CHECK32: xorl %eax, %eax
|
||||
; CHECK32-NEXT: incl %eax
|
||||
; CHECK32-NEXT: retl
|
||||
}
|
||||
|
||||
define i16 @minus_one16() optsize {
|
||||
entry:
|
||||
ret i16 -1
|
||||
|
||||
; CHECK32-LABEL: minus_one16
|
||||
; CHECK32: xorl %eax, %eax
|
||||
; CHECK32-NEXT: decl %eax
|
||||
; CHECK32-NEXT: retl
|
||||
}
|
||||
|
||||
define i32 @test_rematerialization() optsize {
|
||||
entry:
|
||||
; Materialize -1 (thiscall forces it into %ecx).
|
||||
tail call x86_thiscallcc void @f(i32 -1)
|
||||
|
||||
; Clobber all registers except %esp, leaving nowhere to store the -1 besides
|
||||
; spilling it to the stack.
|
||||
tail call void asm sideeffect "", "~{eax},~{ebx},~{ecx},~{edx},~{edi},~{esi},~{ebp},~{dirflag},~{fpsr},~{flags}"()
|
||||
|
||||
; -1 should be re-materialized here instead of getting spilled above.
|
||||
ret i32 -1
|
||||
|
||||
; CHECK32-LABEL: test_rematerialization
|
||||
; CHECK32: xorl %ecx, %ecx
|
||||
; CHECK32-NEXT: decl %ecx
|
||||
; CHECK32: calll
|
||||
; CHECK32: xorl %eax, %eax
|
||||
; CHECK32-NEXT: decl %eax
|
||||
; CHECK32-NOT: %eax
|
||||
; CHECK32: retl
|
||||
}
|
||||
|
||||
define i32 @test_rematerialization2(i32 %x) optsize {
|
||||
entry:
|
||||
; Materialize -1 (thiscall forces it into %ecx).
|
||||
tail call x86_thiscallcc void @f(i32 -1)
|
||||
|
||||
; Clobber all registers except %esp, leaving nowhere to store the -1 besides
|
||||
; spilling it to the stack.
|
||||
tail call void asm sideeffect "", "~{eax},~{ebx},~{ecx},~{edx},~{edi},~{esi},~{ebp},~{dirflag},~{fpsr},~{flags}"()
|
||||
|
||||
; Define eflags.
|
||||
%a = icmp ne i32 %x, 123
|
||||
%b = zext i1 %a to i32
|
||||
; Cause -1 to be rematerialized right in front of the cmov, which needs eflags.
|
||||
; It must therefore not use the xor-dec lowering.
|
||||
%c = select i1 %a, i32 %b, i32 -1
|
||||
ret i32 %c
|
||||
|
||||
; CHECK32-LABEL: test_rematerialization2
|
||||
; CHECK32: xorl %ecx, %ecx
|
||||
; CHECK32-NEXT: decl %ecx
|
||||
; CHECK32: calll
|
||||
; CHECK32: cmpl
|
||||
; CHECK32: setne
|
||||
; CHECK32-NOT: xorl
|
||||
; CHECK32: movl $-1
|
||||
; CHECK32: cmov
|
||||
; CHECK32: retl
|
||||
}
|
||||
|
||||
declare x86_thiscallcc void @f(i32)
|
214
test/CodeGen/X86/materialize.ll
Normal file
214
test/CodeGen/X86/materialize.ll
Normal file
@ -0,0 +1,214 @@
|
||||
; RUN: llc -mtriple=i686-unknown-linux-gnu -mattr=+cmov %s -o - | FileCheck %s --check-prefix=CHECK32
|
||||
; RUN: llc -mtriple=x86_64-unknown-linux-gnu -mattr=+cmov %s -o - | FileCheck %s --check-prefix=CHECK64
|
||||
; RUN: llc -mtriple=x86_64-pc-win32 -mattr=+cmov %s -o - | FileCheck %s --check-prefix=CHECKWIN64
|
||||
|
||||
define i32 @one32_nooptsize() {
|
||||
entry:
|
||||
ret i32 1
|
||||
|
||||
; When not optimizing for size, use mov.
|
||||
; CHECK32-LABEL: one32_nooptsize:
|
||||
; CHECK32: movl $1, %eax
|
||||
; CHECK32-NEXT: retl
|
||||
; CHECK64-LABEL: one32_nooptsize:
|
||||
; CHECK64: movl $1, %eax
|
||||
; CHECK64-NEXT: retq
|
||||
}
|
||||
|
||||
define i32 @one32() optsize {
|
||||
entry:
|
||||
ret i32 1
|
||||
|
||||
; CHECK32-LABEL: one32:
|
||||
; CHECK32: xorl %eax, %eax
|
||||
; CHECK32-NEXT: incl %eax
|
||||
; CHECK32-NEXT: retl
|
||||
|
||||
; FIXME: Figure out the best approach in 64-bit mode.
|
||||
; CHECK64-LABEL: one32:
|
||||
; CHECK64: movl $1, %eax
|
||||
; CHECK64-NEXT: retq
|
||||
}
|
||||
|
||||
define i32 @one32_minsize() minsize {
|
||||
entry:
|
||||
ret i32 1
|
||||
|
||||
; On 32-bit, xor-inc is preferred over push-pop.
|
||||
; CHECK32-LABEL: one32_minsize:
|
||||
; CHECK32: xorl %eax, %eax
|
||||
; CHECK32-NEXT: incl %eax
|
||||
; CHECK32-NEXT: retl
|
||||
|
||||
; On 64-bit we don't do xor-inc yet, so push-pop it is. Note that we have to
|
||||
; pop into a 64-bit register even when we just need 32 bits.
|
||||
; CHECK64-LABEL: one32_minsize:
|
||||
; CHECK64: pushq $1
|
||||
; CHECK64: .cfi_adjust_cfa_offset 8
|
||||
; CHECK64: popq %rax
|
||||
; CHECK64: .cfi_adjust_cfa_offset -8
|
||||
; CHECK64-NEXT: retq
|
||||
|
||||
; On Win64 we can't adjust the stack unless there's a frame pointer.
|
||||
; CHECKWIN64-LABEL: one32_minsize:
|
||||
; CHECKWIN64: movl $1, %eax
|
||||
; CHECKWIN64-NEXT: retq
|
||||
}
|
||||
|
||||
define i32 @pr26023() minsize {
|
||||
entry:
|
||||
%x = alloca [120 x i8]
|
||||
%0 = getelementptr inbounds [120 x i8], [120 x i8]* %x, i64 0, i64 0
|
||||
call void asm sideeffect "", "imr,~{memory},~{dirflag},~{fpsr},~{flags}"(i8* %0)
|
||||
%arrayidx = getelementptr inbounds [120 x i8], [120 x i8]* %x, i64 0, i64 119
|
||||
store volatile i8 -2, i8* %arrayidx
|
||||
call void asm sideeffect "", "r,~{dirflag},~{fpsr},~{flags}"(i32 5)
|
||||
%1 = load volatile i8, i8* %arrayidx
|
||||
%conv = sext i8 %1 to i32
|
||||
ret i32 %conv
|
||||
|
||||
; The function writes to the redzone, so push/pop cannot be used.
|
||||
; CHECK64-LABEL: pr26023:
|
||||
; CHECK64: movl $5, %ecx
|
||||
; CHECK64: retq
|
||||
|
||||
; 32-bit X86 doesn't have a redzone.
|
||||
; CHECK32-LABEL: pr26023:
|
||||
; CHECK32: pushl $5
|
||||
; CHECK32: popl %ecx
|
||||
; CHECK32: retl
|
||||
}
|
||||
|
||||
|
||||
define i64 @one64_minsize() minsize {
|
||||
entry:
|
||||
ret i64 1
|
||||
; On 64-bit we don't do xor-inc yet, so push-pop it is.
|
||||
; CHECK64-LABEL: one64_minsize:
|
||||
; CHECK64: pushq $1
|
||||
; CHECK64: .cfi_adjust_cfa_offset 8
|
||||
; CHECK64: popq %rax
|
||||
; CHECK64: .cfi_adjust_cfa_offset -8
|
||||
; CHECK64-NEXT: retq
|
||||
|
||||
; On Win64 we can't adjust the stack unless there's a frame pointer.
|
||||
; CHECKWIN64-LABEL: one64_minsize:
|
||||
; CHECKWIN64: movl $1, %eax
|
||||
; CHECKWIN64-NEXT: retq
|
||||
}
|
||||
|
||||
define i32 @minus_one32() optsize {
|
||||
entry:
|
||||
ret i32 -1
|
||||
|
||||
; CHECK32-LABEL: minus_one32:
|
||||
; CHECK32: xorl %eax, %eax
|
||||
; CHECK32-NEXT: decl %eax
|
||||
; CHECK32-NEXT: retl
|
||||
}
|
||||
|
||||
define i32 @minus_one32_minsize() minsize {
|
||||
entry:
|
||||
ret i32 -1
|
||||
|
||||
; xor-dec is preferred over push-pop.
|
||||
; CHECK32-LABEL: minus_one32_minsize:
|
||||
; CHECK32: xorl %eax, %eax
|
||||
; CHECK32-NEXT: decl %eax
|
||||
; CHECK32-NEXT: retl
|
||||
}
|
||||
|
||||
define i16 @one16() optsize {
|
||||
entry:
|
||||
ret i16 1
|
||||
|
||||
; CHECK32-LABEL: one16:
|
||||
; CHECK32: xorl %eax, %eax
|
||||
; CHECK32-NEXT: incl %eax
|
||||
; CHECK32-NEXT: retl
|
||||
}
|
||||
|
||||
define i16 @minus_one16() optsize {
|
||||
entry:
|
||||
ret i16 -1
|
||||
|
||||
; CHECK32-LABEL: minus_one16:
|
||||
; CHECK32: xorl %eax, %eax
|
||||
; CHECK32-NEXT: decl %eax
|
||||
; CHECK32-NEXT: retl
|
||||
}
|
||||
|
||||
define i32 @minus_five32() minsize {
|
||||
entry:
|
||||
ret i32 -5
|
||||
|
||||
; CHECK32-LABEL: minus_five32:
|
||||
; CHECK32: pushl $-5
|
||||
; CHECK32: popl %eax
|
||||
; CHECK32: retl
|
||||
}
|
||||
|
||||
define i64 @minus_five64() minsize {
|
||||
entry:
|
||||
ret i64 -5
|
||||
|
||||
; CHECK64-LABEL: minus_five64:
|
||||
; CHECK64: pushq $-5
|
||||
; CHECK64: .cfi_adjust_cfa_offset 8
|
||||
; CHECK64: popq %rax
|
||||
; CHECK64: .cfi_adjust_cfa_offset -8
|
||||
; CHECK64: retq
|
||||
}
|
||||
|
||||
define i32 @rematerialize_minus_one() optsize {
|
||||
entry:
|
||||
; Materialize -1 (thiscall forces it into %ecx).
|
||||
tail call x86_thiscallcc void @f(i32 -1)
|
||||
|
||||
; Clobber all registers except %esp, leaving nowhere to store the -1 besides
|
||||
; spilling it to the stack.
|
||||
tail call void asm sideeffect "", "~{eax},~{ebx},~{ecx},~{edx},~{edi},~{esi},~{ebp},~{dirflag},~{fpsr},~{flags}"()
|
||||
|
||||
; -1 should be re-materialized here instead of getting spilled above.
|
||||
ret i32 -1
|
||||
|
||||
; CHECK32-LABEL: rematerialize_minus_one
|
||||
; CHECK32: xorl %ecx, %ecx
|
||||
; CHECK32-NEXT: decl %ecx
|
||||
; CHECK32: calll
|
||||
; CHECK32: xorl %eax, %eax
|
||||
; CHECK32-NEXT: decl %eax
|
||||
; CHECK32-NOT: %eax
|
||||
; CHECK32: retl
|
||||
}
|
||||
|
||||
define i32 @rematerialize_minus_one_eflags(i32 %x) optsize {
|
||||
entry:
|
||||
; Materialize -1 (thiscall forces it into %ecx).
|
||||
tail call x86_thiscallcc void @f(i32 -1)
|
||||
|
||||
; Clobber all registers except %esp, leaving nowhere to store the -1 besides
|
||||
; spilling it to the stack.
|
||||
tail call void asm sideeffect "", "~{eax},~{ebx},~{ecx},~{edx},~{edi},~{esi},~{ebp},~{dirflag},~{fpsr},~{flags}"()
|
||||
|
||||
; Define eflags.
|
||||
%a = icmp ne i32 %x, 123
|
||||
%b = zext i1 %a to i32
|
||||
; Cause -1 to be rematerialized right in front of the cmov, which needs eflags.
|
||||
; It must therefore not use the xor-dec lowering.
|
||||
%c = select i1 %a, i32 %b, i32 -1
|
||||
ret i32 %c
|
||||
|
||||
; CHECK32-LABEL: rematerialize_minus_one_eflags
|
||||
; CHECK32: xorl %ecx, %ecx
|
||||
; CHECK32-NEXT: decl %ecx
|
||||
; CHECK32: calll
|
||||
; CHECK32: cmpl
|
||||
; CHECK32: setne
|
||||
; CHECK32-NOT: xorl
|
||||
; CHECK32: movl $-1
|
||||
; CHECK32: cmov
|
||||
; CHECK32: retl
|
||||
}
|
||||
|
||||
declare x86_thiscallcc void @f(i32)
|
@ -29,8 +29,9 @@ define double @pow_wrapper_optsize(double %a) optsize {
|
||||
define double @pow_wrapper_minsize(double %a) minsize {
|
||||
; CHECK-LABEL: pow_wrapper_minsize:
|
||||
; CHECK: # BB#0:
|
||||
; CHECK-NEXT: movl $15, %edi
|
||||
; CHECK-NEXT: jmp
|
||||
; CHECK-NEXT: pushq $15
|
||||
; CHECK: popq %rdi
|
||||
; CHECK: jmp
|
||||
%ret = tail call double @llvm.powi.f64(double %a, i32 15) nounwind ; <double> [#uses=1]
|
||||
ret double %ret
|
||||
}
|
||||
|
Loading…
Reference in New Issue
Block a user