mirror of
https://github.com/RPCS3/llvm-mirror.git
synced 2024-11-26 12:43:36 +01:00
Implement support for x86 fastisel of small fixed-sized memcpys, which are generated
en-mass for C++ PODs. On my c++ test file, this cuts the fast isel rejects by 10x and shrinks the generated .s file by 5% llvm-svn: 129755
This commit is contained in:
parent
ec5a480dca
commit
f15db6c86f
@ -486,8 +486,7 @@ bool FastISel::SelectCall(const User *I) {
|
|||||||
if (!F) return false;
|
if (!F) return false;
|
||||||
|
|
||||||
// Handle selected intrinsic function calls.
|
// Handle selected intrinsic function calls.
|
||||||
unsigned IID = F->getIntrinsicID();
|
switch (F->getIntrinsicID()) {
|
||||||
switch (IID) {
|
|
||||||
default: break;
|
default: break;
|
||||||
case Intrinsic::dbg_declare: {
|
case Intrinsic::dbg_declare: {
|
||||||
const DbgDeclareInst *DI = cast<DbgDeclareInst>(I);
|
const DbgDeclareInst *DI = cast<DbgDeclareInst>(I);
|
||||||
@ -552,9 +551,9 @@ bool FastISel::SelectCall(const User *I) {
|
|||||||
}
|
}
|
||||||
case Intrinsic::eh_exception: {
|
case Intrinsic::eh_exception: {
|
||||||
EVT VT = TLI.getValueType(I->getType());
|
EVT VT = TLI.getValueType(I->getType());
|
||||||
switch (TLI.getOperationAction(ISD::EXCEPTIONADDR, VT)) {
|
if (TLI.getOperationAction(ISD::EXCEPTIONADDR, VT)!=TargetLowering::Expand)
|
||||||
default: break;
|
break;
|
||||||
case TargetLowering::Expand: {
|
|
||||||
assert(FuncInfo.MBB->isLandingPad() &&
|
assert(FuncInfo.MBB->isLandingPad() &&
|
||||||
"Call to eh.exception not in landing pad!");
|
"Call to eh.exception not in landing pad!");
|
||||||
unsigned Reg = TLI.getExceptionAddressRegister();
|
unsigned Reg = TLI.getExceptionAddressRegister();
|
||||||
@ -565,14 +564,10 @@ bool FastISel::SelectCall(const User *I) {
|
|||||||
UpdateValueMap(I, ResultReg);
|
UpdateValueMap(I, ResultReg);
|
||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
}
|
|
||||||
break;
|
|
||||||
}
|
|
||||||
case Intrinsic::eh_selector: {
|
case Intrinsic::eh_selector: {
|
||||||
EVT VT = TLI.getValueType(I->getType());
|
EVT VT = TLI.getValueType(I->getType());
|
||||||
switch (TLI.getOperationAction(ISD::EHSELECTION, VT)) {
|
if (TLI.getOperationAction(ISD::EHSELECTION, VT) != TargetLowering::Expand)
|
||||||
default: break;
|
break;
|
||||||
case TargetLowering::Expand: {
|
|
||||||
if (FuncInfo.MBB->isLandingPad())
|
if (FuncInfo.MBB->isLandingPad())
|
||||||
AddCatchInfo(*cast<CallInst>(I), &FuncInfo.MF->getMMI(), FuncInfo.MBB);
|
AddCatchInfo(*cast<CallInst>(I), &FuncInfo.MF->getMMI(), FuncInfo.MBB);
|
||||||
else {
|
else {
|
||||||
@ -609,9 +604,6 @@ bool FastISel::SelectCall(const User *I) {
|
|||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
break;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
// An arbitrary call. Bail.
|
// An arbitrary call. Bail.
|
||||||
return false;
|
return false;
|
||||||
|
@ -1325,6 +1325,52 @@ bool X86FastISel::X86VisitIntrinsicCall(const IntrinsicInst &I) {
|
|||||||
// FIXME: Handle more intrinsics.
|
// FIXME: Handle more intrinsics.
|
||||||
switch (I.getIntrinsicID()) {
|
switch (I.getIntrinsicID()) {
|
||||||
default: return false;
|
default: return false;
|
||||||
|
case Intrinsic::memcpy: {
|
||||||
|
const MemCpyInst &MCI = cast<MemCpyInst>(I);
|
||||||
|
// Don't handle volatile or variable length memcpys.
|
||||||
|
if (MCI.isVolatile() || !isa<ConstantInt>(MCI.getLength()))
|
||||||
|
return false;
|
||||||
|
|
||||||
|
// Don't inline super long memcpys. We could lower these to a memcpy call,
|
||||||
|
// but we might as well bail out.
|
||||||
|
uint64_t Len = cast<ConstantInt>(MCI.getLength())->getZExtValue();
|
||||||
|
bool i64Legal = TLI.isTypeLegal(MVT::i64);
|
||||||
|
if (Len > (i64Legal ? 32 : 16)) return false;
|
||||||
|
|
||||||
|
// Get the address of the dest and source addresses.
|
||||||
|
X86AddressMode DestAM, SrcAM;
|
||||||
|
if (!X86SelectAddress(MCI.getRawDest(), DestAM) ||
|
||||||
|
!X86SelectAddress(MCI.getRawSource(), SrcAM))
|
||||||
|
return false;
|
||||||
|
|
||||||
|
// We don't care about alignment here since we just emit integer accesses.
|
||||||
|
while (Len) {
|
||||||
|
MVT VT;
|
||||||
|
if (Len >= 8 && i64Legal)
|
||||||
|
VT = MVT::i64;
|
||||||
|
else if (Len >= 4)
|
||||||
|
VT = MVT::i32;
|
||||||
|
else if (Len >= 2)
|
||||||
|
VT = MVT::i16;
|
||||||
|
else {
|
||||||
|
assert(Len == 1);
|
||||||
|
VT = MVT::i8;
|
||||||
|
}
|
||||||
|
|
||||||
|
unsigned Reg;
|
||||||
|
bool RV = X86FastEmitLoad(VT, SrcAM, Reg);
|
||||||
|
RV &= X86FastEmitStore(VT, Reg, DestAM);
|
||||||
|
assert(RV && "Failed to emit load or store??");
|
||||||
|
|
||||||
|
unsigned Size = VT.getSizeInBits()/8;
|
||||||
|
Len -= Size;
|
||||||
|
DestAM.Disp += Size;
|
||||||
|
SrcAM.Disp += Size;
|
||||||
|
}
|
||||||
|
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
|
||||||
case Intrinsic::stackprotector: {
|
case Intrinsic::stackprotector: {
|
||||||
// Emit code inline code to store the stack guard onto the stack.
|
// Emit code inline code to store the stack guard onto the stack.
|
||||||
EVT PtrTy = TLI.getPointerTy();
|
EVT PtrTy = TLI.getPointerTy();
|
||||||
@ -1335,17 +1381,14 @@ bool X86FastISel::X86VisitIntrinsicCall(const IntrinsicInst &I) {
|
|||||||
// Grab the frame index.
|
// Grab the frame index.
|
||||||
X86AddressMode AM;
|
X86AddressMode AM;
|
||||||
if (!X86SelectAddress(Slot, AM)) return false;
|
if (!X86SelectAddress(Slot, AM)) return false;
|
||||||
|
|
||||||
if (!X86FastEmitStore(PtrTy, Op1, AM)) return false;
|
if (!X86FastEmitStore(PtrTy, Op1, AM)) return false;
|
||||||
|
|
||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
case Intrinsic::objectsize: {
|
case Intrinsic::objectsize: {
|
||||||
ConstantInt *CI = dyn_cast<ConstantInt>(I.getArgOperand(1));
|
// FIXME: This should be moved to generic code!
|
||||||
|
ConstantInt *CI = cast<ConstantInt>(I.getArgOperand(1));
|
||||||
const Type *Ty = I.getCalledFunction()->getReturnType();
|
const Type *Ty = I.getCalledFunction()->getReturnType();
|
||||||
|
|
||||||
assert(CI && "Non-constant type in Intrinsic::objectsize?");
|
|
||||||
|
|
||||||
MVT VT;
|
MVT VT;
|
||||||
if (!isTypeLegal(Ty, VT))
|
if (!isTypeLegal(Ty, VT))
|
||||||
return false;
|
return false;
|
||||||
@ -1383,6 +1426,8 @@ bool X86FastISel::X86VisitIntrinsicCall(const IntrinsicInst &I) {
|
|||||||
}
|
}
|
||||||
case Intrinsic::sadd_with_overflow:
|
case Intrinsic::sadd_with_overflow:
|
||||||
case Intrinsic::uadd_with_overflow: {
|
case Intrinsic::uadd_with_overflow: {
|
||||||
|
// FIXME: Should fold immediates.
|
||||||
|
|
||||||
// Replace "add with overflow" intrinsics with an "add" instruction followed
|
// Replace "add with overflow" intrinsics with an "add" instruction followed
|
||||||
// by a seto/setc instruction. Later on, when the "extractvalue"
|
// by a seto/setc instruction. Later on, when the "extractvalue"
|
||||||
// instructions are encountered, we use the fact that two registers were
|
// instructions are encountered, we use the fact that two registers were
|
||||||
|
@ -170,3 +170,14 @@ entry:
|
|||||||
; CHECK: callq
|
; CHECK: callq
|
||||||
}
|
}
|
||||||
|
|
||||||
|
declare void @llvm.memcpy.p0i8.p0i8.i64(i8*, i8*, i64, i32, i1)
|
||||||
|
|
||||||
|
; rdar://9289488 - fast-isel shouldn't bail out on llvm.memcpy
|
||||||
|
define void @test15(i8* %a, i8* %b) nounwind {
|
||||||
|
call void @llvm.memcpy.p0i8.p0i8.i64(i8* %a, i8* %b, i64 4, i32 4, i1 false)
|
||||||
|
ret void
|
||||||
|
; CHECK: test15:
|
||||||
|
; CHECK-NEXT: movl (%rsi), %eax
|
||||||
|
; CHECK-NEXT: movl %eax, (%rdi)
|
||||||
|
; CHECK-NEXT: ret
|
||||||
|
}
|
||||||
|
Loading…
Reference in New Issue
Block a user