mirror of
https://github.com/RPCS3/llvm-mirror.git
synced 2024-11-26 04:32:44 +01:00
[CostModel][X86] Handle costs for insert/extractelement with non-immediate indices via stack
Determine the insert/extractelement costs when performing this as a sequence of aliased loads+stores via the stack.
This commit is contained in:
parent
4d75d8ef44
commit
8ed3c99d18
@ -3263,6 +3263,36 @@ InstructionCost X86TTIImpl::getVectorInstrCost(unsigned Opcode, Type *Val,
|
||||
Type *ScalarType = Val->getScalarType();
|
||||
int RegisterFileMoveCost = 0;
|
||||
|
||||
// Non-immediate extraction/insertion can be handled as a sequence of
|
||||
// aliased loads+stores via the stack.
|
||||
if (Index == -1U && (Opcode == Instruction::ExtractElement ||
|
||||
Opcode == Instruction::InsertElement)) {
|
||||
// TODO: On some SSE41+ targets, we expand to cmp+splat+select patterns:
|
||||
// inselt N0, N1, N2 --> select (SplatN2 == {0,1,2...}) ? SplatN1 : N0.
|
||||
|
||||
// TODO: Move this to BasicTTIImpl.h? We'd need better gep + index handling.
|
||||
assert(isa<FixedVectorType>(Val) && "Fixed vector type expected");
|
||||
Align VecAlign = DL.getPrefTypeAlign(Val);
|
||||
Align SclAlign = DL.getPrefTypeAlign(ScalarType);
|
||||
|
||||
// Extract - store vector to stack, load scalar.
|
||||
if (Opcode == Instruction::ExtractElement) {
|
||||
return getMemoryOpCost(Instruction::Store, Val, VecAlign, 0,
|
||||
TTI::TargetCostKind::TCK_RecipThroughput) +
|
||||
getMemoryOpCost(Instruction::Load, ScalarType, SclAlign, 0,
|
||||
TTI::TargetCostKind::TCK_RecipThroughput);
|
||||
}
|
||||
// Insert - store vector to stack, store scalar, load vector.
|
||||
if (Opcode == Instruction::InsertElement) {
|
||||
return getMemoryOpCost(Instruction::Store, Val, VecAlign, 0,
|
||||
TTI::TargetCostKind::TCK_RecipThroughput) +
|
||||
getMemoryOpCost(Instruction::Store, ScalarType, SclAlign, 0,
|
||||
TTI::TargetCostKind::TCK_RecipThroughput) +
|
||||
getMemoryOpCost(Instruction::Load, Val, VecAlign, 0,
|
||||
TTI::TargetCostKind::TCK_RecipThroughput);
|
||||
}
|
||||
}
|
||||
|
||||
if (Index != -1U && (Opcode == Instruction::ExtractElement ||
|
||||
Opcode == Instruction::InsertElement)) {
|
||||
// Legalize the type.
|
||||
|
@ -16,7 +16,7 @@ define i32 @insert-extract-at-zero-idx(i32 %arg, float %fl) {
|
||||
;CHECK: cost of 1 {{.*}} extract
|
||||
%E = extractelement <8 x float> undef, i32 1
|
||||
|
||||
;CHECK: cost of 1 {{.*}} extract
|
||||
;CHECK: cost of 3 {{.*}} extract
|
||||
%F = extractelement <8 x float> undef, i32 %arg
|
||||
|
||||
;CHECK: cost of 0 {{.*}} insert
|
||||
|
@ -16,7 +16,7 @@ define i32 @insert-extract-at-zero-idx(i32 %arg, float %fl) {
|
||||
;CHECK: cost of 1 {{.*}} extract
|
||||
%E = extractelement <8 x float> undef, i32 1
|
||||
|
||||
;CHECK: cost of 1 {{.*}} extract
|
||||
;CHECK: cost of 3 {{.*}} extract
|
||||
%F = extractelement <8 x float> undef, i32 %arg
|
||||
|
||||
;CHECK: cost of 0 {{.*}} insert
|
||||
|
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
Loading…
Reference in New Issue
Block a user