mirror of
https://github.com/RPCS3/llvm-mirror.git
synced 2024-11-24 19:52:54 +01:00
Re-implement r122936 with proper target hooks. Now getMaxStoresPerMemcpy
etc. takes an option OptSize. If OptSize is true, it would return the inline limit for functions with attribute OptSize. llvm-svn: 122952
This commit is contained in:
parent
83067bc3e7
commit
cb39cc2164
@ -642,21 +642,30 @@ public:
|
||||
|
||||
/// This function returns the maximum number of store operations permitted
|
||||
/// to replace a call to llvm.memset. The value is set by the target at the
|
||||
/// performance threshold for such a replacement.
|
||||
/// performance threshold for such a replacement. If OptSize is true,
|
||||
/// return the limit for functions that have OptSize attribute.
|
||||
/// @brief Get maximum # of store operations permitted for llvm.memset
|
||||
unsigned getMaxStoresPerMemset() const { return maxStoresPerMemset; }
|
||||
unsigned getMaxStoresPerMemset(bool OptSize) const {
|
||||
return OptSize ? maxStoresPerMemsetOptSize : maxStoresPerMemset;
|
||||
}
|
||||
|
||||
/// This function returns the maximum number of store operations permitted
|
||||
/// to replace a call to llvm.memcpy. The value is set by the target at the
|
||||
/// performance threshold for such a replacement.
|
||||
/// performance threshold for such a replacement. If OptSize is true,
|
||||
/// return the limit for functions that have OptSize attribute.
|
||||
/// @brief Get maximum # of store operations permitted for llvm.memcpy
|
||||
unsigned getMaxStoresPerMemcpy() const { return maxStoresPerMemcpy; }
|
||||
unsigned getMaxStoresPerMemcpy(bool OptSize) const {
|
||||
return OptSize ? maxStoresPerMemcpyOptSize : maxStoresPerMemcpy;
|
||||
}
|
||||
|
||||
/// This function returns the maximum number of store operations permitted
|
||||
/// to replace a call to llvm.memmove. The value is set by the target at the
|
||||
/// performance threshold for such a replacement.
|
||||
/// performance threshold for such a replacement. If OptSize is true,
|
||||
/// return the limit for functions that have OptSize attribute.
|
||||
/// @brief Get maximum # of store operations permitted for llvm.memmove
|
||||
unsigned getMaxStoresPerMemmove() const { return maxStoresPerMemmove; }
|
||||
unsigned getMaxStoresPerMemmove(bool OptSize) const {
|
||||
return OptSize ? maxStoresPerMemmoveOptSize : maxStoresPerMemmove;
|
||||
}
|
||||
|
||||
/// This function returns true if the target allows unaligned memory accesses.
|
||||
/// of the specified type. This is used, for example, in situations where an
|
||||
@ -1776,6 +1785,10 @@ protected:
|
||||
/// @brief Specify maximum number of store instructions per memset call.
|
||||
unsigned maxStoresPerMemset;
|
||||
|
||||
/// Maximum number of stores operations that may be substituted for the call
|
||||
/// to memset, used for functions with OptSize attribute.
|
||||
unsigned maxStoresPerMemsetOptSize;
|
||||
|
||||
/// When lowering \@llvm.memcpy this field specifies the maximum number of
|
||||
/// store operations that may be substituted for a call to memcpy. Targets
|
||||
/// must set this value based on the cost threshold for that target. Targets
|
||||
@ -1788,6 +1801,10 @@ protected:
|
||||
/// @brief Specify maximum bytes of store instructions per memcpy call.
|
||||
unsigned maxStoresPerMemcpy;
|
||||
|
||||
/// Maximum number of store operations that may be substituted for a call
|
||||
/// to memcpy, used for functions with OptSize attribute.
|
||||
unsigned maxStoresPerMemcpyOptSize;
|
||||
|
||||
/// When lowering \@llvm.memmove this field specifies the maximum number of
|
||||
/// store instructions that may be substituted for a call to memmove. Targets
|
||||
/// must set this value based on the cost threshold for that target. Targets
|
||||
@ -1799,6 +1816,10 @@ protected:
|
||||
/// @brief Specify maximum bytes of store instructions per memmove call.
|
||||
unsigned maxStoresPerMemmove;
|
||||
|
||||
/// Maximum number of store instructions that may be substituted for a call
|
||||
/// to memmove, used for functions with OpSize attribute.
|
||||
unsigned maxStoresPerMemmoveOptSize;
|
||||
|
||||
/// This field specifies whether the target can benefit from code placement
|
||||
/// optimization.
|
||||
bool benefitFromCodePlacementOpt;
|
||||
|
@ -3281,15 +3281,6 @@ static bool FindOptimalMemOpLowering(std::vector<EVT> &MemOps,
|
||||
VT = LVT;
|
||||
}
|
||||
|
||||
// If we're optimizing for size, and there is a limit, bump the maximum number
|
||||
// of operations inserted down to 4. This is a wild guess that approximates
|
||||
// the size of a call to memcpy or memset (3 arguments + call).
|
||||
if (Limit != ~0U) {
|
||||
const Function *F = DAG.getMachineFunction().getFunction();
|
||||
if (F->hasFnAttr(Attribute::OptimizeForSize))
|
||||
Limit = 4;
|
||||
}
|
||||
|
||||
unsigned NumMemOps = 0;
|
||||
while (Size != 0) {
|
||||
unsigned VTSize = VT.getSizeInBits() / 8;
|
||||
@ -3335,7 +3326,9 @@ static SDValue getMemcpyLoadsAndStores(SelectionDAG &DAG, DebugLoc dl,
|
||||
const TargetLowering &TLI = DAG.getTargetLoweringInfo();
|
||||
std::vector<EVT> MemOps;
|
||||
bool DstAlignCanChange = false;
|
||||
MachineFrameInfo *MFI = DAG.getMachineFunction().getFrameInfo();
|
||||
MachineFunction &MF = DAG.getMachineFunction();
|
||||
MachineFrameInfo *MFI = MF.getFrameInfo();
|
||||
bool OptSize = MF.getFunction()->hasFnAttr(Attribute::OptimizeForSize);
|
||||
FrameIndexSDNode *FI = dyn_cast<FrameIndexSDNode>(Dst);
|
||||
if (FI && !MFI->isFixedObjectIndex(FI->getIndex()))
|
||||
DstAlignCanChange = true;
|
||||
@ -3345,7 +3338,7 @@ static SDValue getMemcpyLoadsAndStores(SelectionDAG &DAG, DebugLoc dl,
|
||||
std::string Str;
|
||||
bool CopyFromStr = isMemSrcFromString(Src, Str);
|
||||
bool isZeroStr = CopyFromStr && Str.empty();
|
||||
unsigned Limit = AlwaysInline ? ~0U : TLI.getMaxStoresPerMemcpy();
|
||||
unsigned Limit = AlwaysInline ? ~0U : TLI.getMaxStoresPerMemcpy(OptSize);
|
||||
|
||||
if (!FindOptimalMemOpLowering(MemOps, Limit, Size,
|
||||
(DstAlignCanChange ? 0 : Align),
|
||||
@ -3426,14 +3419,16 @@ static SDValue getMemmoveLoadsAndStores(SelectionDAG &DAG, DebugLoc dl,
|
||||
const TargetLowering &TLI = DAG.getTargetLoweringInfo();
|
||||
std::vector<EVT> MemOps;
|
||||
bool DstAlignCanChange = false;
|
||||
MachineFrameInfo *MFI = DAG.getMachineFunction().getFrameInfo();
|
||||
MachineFunction &MF = DAG.getMachineFunction();
|
||||
MachineFrameInfo *MFI = MF.getFrameInfo();
|
||||
bool OptSize = MF.getFunction()->hasFnAttr(Attribute::OptimizeForSize);
|
||||
FrameIndexSDNode *FI = dyn_cast<FrameIndexSDNode>(Dst);
|
||||
if (FI && !MFI->isFixedObjectIndex(FI->getIndex()))
|
||||
DstAlignCanChange = true;
|
||||
unsigned SrcAlign = DAG.InferPtrAlignment(Src);
|
||||
if (Align > SrcAlign)
|
||||
SrcAlign = Align;
|
||||
unsigned Limit = AlwaysInline ? ~0U : TLI.getMaxStoresPerMemmove();
|
||||
unsigned Limit = AlwaysInline ? ~0U : TLI.getMaxStoresPerMemmove(OptSize);
|
||||
|
||||
if (!FindOptimalMemOpLowering(MemOps, Limit, Size,
|
||||
(DstAlignCanChange ? 0 : Align),
|
||||
@ -3502,13 +3497,15 @@ static SDValue getMemsetStores(SelectionDAG &DAG, DebugLoc dl,
|
||||
const TargetLowering &TLI = DAG.getTargetLoweringInfo();
|
||||
std::vector<EVT> MemOps;
|
||||
bool DstAlignCanChange = false;
|
||||
MachineFrameInfo *MFI = DAG.getMachineFunction().getFrameInfo();
|
||||
MachineFunction &MF = DAG.getMachineFunction();
|
||||
MachineFrameInfo *MFI = MF.getFrameInfo();
|
||||
bool OptSize = MF.getFunction()->hasFnAttr(Attribute::OptimizeForSize);
|
||||
FrameIndexSDNode *FI = dyn_cast<FrameIndexSDNode>(Dst);
|
||||
if (FI && !MFI->isFixedObjectIndex(FI->getIndex()))
|
||||
DstAlignCanChange = true;
|
||||
bool NonScalarIntSafe =
|
||||
isa<ConstantSDNode>(Src) && cast<ConstantSDNode>(Src)->isNullValue();
|
||||
if (!FindOptimalMemOpLowering(MemOps, TLI.getMaxStoresPerMemset(),
|
||||
if (!FindOptimalMemOpLowering(MemOps, TLI.getMaxStoresPerMemset(OptSize),
|
||||
Size, (DstAlignCanChange ? 0 : Align), 0,
|
||||
NonScalarIntSafe, false, DAG, TLI))
|
||||
return SDValue();
|
||||
|
@ -567,6 +567,8 @@ TargetLowering::TargetLowering(const TargetMachine &tm,
|
||||
memset(RegClassForVT, 0,MVT::LAST_VALUETYPE*sizeof(TargetRegisterClass*));
|
||||
memset(TargetDAGCombineArray, 0, array_lengthof(TargetDAGCombineArray));
|
||||
maxStoresPerMemset = maxStoresPerMemcpy = maxStoresPerMemmove = 8;
|
||||
maxStoresPerMemsetOptSize = maxStoresPerMemcpyOptSize
|
||||
= maxStoresPerMemmoveOptSize = 4;
|
||||
benefitFromCodePlacementOpt = false;
|
||||
UseUnderscoreSetJmp = false;
|
||||
UseUnderscoreLongJmp = false;
|
||||
|
@ -687,7 +687,8 @@ ARMTargetLowering::ARMTargetLowering(TargetMachine &TM)
|
||||
else
|
||||
setSchedulingPreference(Sched::Hybrid);
|
||||
|
||||
maxStoresPerMemcpy = 1; //// temporary - rewrite interface to use type
|
||||
//// temporary - rewrite interface to use type
|
||||
maxStoresPerMemcpy = maxStoresPerMemcpyOptSize = 1;
|
||||
|
||||
// On ARM arguments smaller than 4 bytes are extended, so all arguments
|
||||
// are at least 4 bytes aligned.
|
||||
|
@ -978,11 +978,14 @@ X86TargetLowering::X86TargetLowering(X86TargetMachine &TM)
|
||||
|
||||
computeRegisterProperties();
|
||||
|
||||
// FIXME: These should be based on subtarget info. Plus, the values should
|
||||
// be smaller when we are in optimizing for size mode.
|
||||
// On Darwin, -Os means optimize for size without hurting performance,
|
||||
// do not reduce the limit.
|
||||
maxStoresPerMemset = 16; // For @llvm.memset -> sequence of stores
|
||||
maxStoresPerMemsetOptSize = Subtarget->isTargetDarwin() ? 16 : 8;
|
||||
maxStoresPerMemcpy = 8; // For @llvm.memcpy -> sequence of stores
|
||||
maxStoresPerMemmove = 3; // For @llvm.memmove -> sequence of stores
|
||||
maxStoresPerMemcpyOptSize = Subtarget->isTargetDarwin() ? 8 : 4;
|
||||
maxStoresPerMemmove = 8; // For @llvm.memmove -> sequence of stores
|
||||
maxStoresPerMemmoveOptSize = Subtarget->isTargetDarwin() ? 8 : 4;
|
||||
setPrefLoopAlignment(16);
|
||||
benefitFromCodePlacementOpt = true;
|
||||
}
|
||||
|
@ -149,8 +149,9 @@ XCoreTargetLowering::XCoreTargetLowering(XCoreTargetMachine &XTM)
|
||||
setOperationAction(ISD::STACKRESTORE, MVT::Other, Expand);
|
||||
setOperationAction(ISD::DYNAMIC_STACKALLOC, MVT::i32, Expand);
|
||||
|
||||
maxStoresPerMemset = 4;
|
||||
maxStoresPerMemmove = maxStoresPerMemcpy = 2;
|
||||
maxStoresPerMemset = maxStoresPerMemsetOptSize = 4;
|
||||
maxStoresPerMemmove = maxStoresPerMemmoveOptSize
|
||||
= maxStoresPerMemcpy = maxStoresPerMemcpyOptSize = 2;
|
||||
|
||||
// We have target-specific dag combine patterns for the following nodes:
|
||||
setTargetDAGCombine(ISD::STORE);
|
||||
|
@ -1,4 +1,4 @@
|
||||
; RUN: llc -O1 -mtriple=x86_64-apple-darwin10 -relocation-model=pic -disable-fp-elim < %s | FileCheck %s
|
||||
; RUN: llc -O1 -mtriple=x86_64-unknown-linux-gnu -relocation-model=pic -disable-fp-elim < %s | FileCheck %s
|
||||
; <rdar://problem/8124405>
|
||||
|
||||
%struct.type = type { %struct.subtype*, i32, i8, i32, i8, i32, i32, i32, i32, i32, i8, i32, i32, i32, i32, i32, [256 x i32], i32, [257 x i32], [257 x i32], i32*, i16*, i8*, i32, i32, i32, i32, i32, [256 x i8], [16 x i8], [256 x i8], [4096 x i8], [16 x i32], [18002 x i8], [18002 x i8], [6 x [258 x i8]], [6 x [258 x i32]], [6 x [258 x i32]], [6 x [258 x i32]], [6 x i32], i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32*, i32*, i32* }
|
||||
@ -21,9 +21,9 @@ bb:
|
||||
; statement. It can be an ADD or LEA instruction, it's not important which one
|
||||
; it is.
|
||||
;
|
||||
; CHECK: ## %bb
|
||||
; CHECK-NEXT: addq $64036, %rdi
|
||||
; CHECK: rep;stosl
|
||||
; CHECK: # %bb
|
||||
; CHECK: addq $64036, %rdi
|
||||
; CHECK: rep;stosl
|
||||
|
||||
%tmp5 = bitcast i32* %tmp4 to i8*
|
||||
call void @llvm.memset.p0i8.i64(i8* %tmp5, i8 0, i64 84, i32 4, i1 false)
|
||||
|
@ -1,4 +1,5 @@
|
||||
; RUN: llc < %s -march=x86-64 | FileCheck %s
|
||||
; RUN: llc < %s -mtriple=x86_64-unknown-linux-gnu | FileCheck %s -check-prefix=LINUX
|
||||
; RUN: llc < %s -mtriple=x86_64-apple-darwin | FileCheck %s -check-prefix=DARWIN
|
||||
|
||||
declare void @llvm.memcpy.p0i8.p0i8.i64(i8* nocapture, i8* nocapture, i64, i32, i1) nounwind
|
||||
|
||||
@ -9,8 +10,8 @@ entry:
|
||||
tail call void @llvm.memcpy.p0i8.p0i8.i64( i8* %a, i8* %b, i64 %n, i32 1, i1 0 )
|
||||
ret i8* %a
|
||||
|
||||
; CHECK: test1:
|
||||
; CHECK: memcpy
|
||||
; LINUX: test1:
|
||||
; LINUX: memcpy
|
||||
}
|
||||
|
||||
; Variable memcpy's should lower to calls.
|
||||
@ -21,18 +22,41 @@ entry:
|
||||
tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* %tmp14, i8* %tmp25, i64 %n, i32 8, i1 0 )
|
||||
ret i8* %tmp14
|
||||
|
||||
; CHECK: test2:
|
||||
; CHECK: memcpy
|
||||
; LINUX: test2:
|
||||
; LINUX: memcpy
|
||||
}
|
||||
|
||||
; Large constant memcpy's should lower to a call when optimizing for size.
|
||||
; PR6623
|
||||
|
||||
; On the other hand, Darwin's definition of -Os is optimizing for size without
|
||||
; hurting performance so it should just ignore optsize when expanding memcpy.
|
||||
; rdar://8821501
|
||||
define void @test3(i8* nocapture %A, i8* nocapture %B) nounwind optsize noredzone {
|
||||
entry:
|
||||
tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* %A, i8* %B, i64 64, i32 1, i1 false)
|
||||
ret void
|
||||
; CHECK: test3:
|
||||
; CHECK: memcpy
|
||||
; LINUX: test3:
|
||||
; LINUX: memcpy
|
||||
|
||||
; DARWIN: test3:
|
||||
; DARWIN-NOT: memcpy
|
||||
; DARWIN: movq
|
||||
; DARWIN: movq
|
||||
; DARWIN: movq
|
||||
; DARWIN: movq
|
||||
; DARWIN: movq
|
||||
; DARWIN: movq
|
||||
; DARWIN: movq
|
||||
; DARWIN: movq
|
||||
; DARWIN: movq
|
||||
; DARWIN: movq
|
||||
; DARWIN: movq
|
||||
; DARWIN: movq
|
||||
; DARWIN: movq
|
||||
; DARWIN: movq
|
||||
; DARWIN: movq
|
||||
; DARWIN: movq
|
||||
}
|
||||
|
||||
; Large constant memcpy's should be inlined when not optimizing for size.
|
||||
@ -40,18 +64,18 @@ define void @test4(i8* nocapture %A, i8* nocapture %B) nounwind noredzone {
|
||||
entry:
|
||||
tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* %A, i8* %B, i64 64, i32 1, i1 false)
|
||||
ret void
|
||||
; CHECK: test4:
|
||||
; CHECK: movq
|
||||
; CHECK: movq
|
||||
; CHECK: movq
|
||||
; CHECK: movq
|
||||
; CHECK: movq
|
||||
; CHECK: movq
|
||||
; CHECK: movq
|
||||
; CHECK: movq
|
||||
; CHECK: movq
|
||||
; CHECK: movq
|
||||
; CHECK: movq
|
||||
; CHECK: movq
|
||||
; LINUX: test4:
|
||||
; LINUX movq
|
||||
; LINUX movq
|
||||
; LINUX movq
|
||||
; LINUX movq
|
||||
; LINUX movq
|
||||
; LINUX movq
|
||||
; LINUX movq
|
||||
; LINUX movq
|
||||
; LINUX movq
|
||||
; LINUX movq
|
||||
; LINUX movq
|
||||
; LINUX movq
|
||||
}
|
||||
|
||||
|
Loading…
Reference in New Issue
Block a user