1
0
mirror of https://github.com/RPCS3/llvm-mirror.git synced 2024-11-26 04:32:44 +01:00

[CostModel][X86] Handle costs for insert/extractelement with non-immediate indices via stack

Determine the insert/extractelement costs when performing this as a sequence of aliased loads+stores via the stack.
This commit is contained in:
Simon Pilgrim 2021-07-05 13:19:47 +01:00
parent 4d75d8ef44
commit 8ed3c99d18
6 changed files with 844 additions and 715 deletions

View File

@ -3263,6 +3263,36 @@ InstructionCost X86TTIImpl::getVectorInstrCost(unsigned Opcode, Type *Val,
Type *ScalarType = Val->getScalarType();
int RegisterFileMoveCost = 0;
// Non-immediate extraction/insertion can be handled as a sequence of
// aliased loads+stores via the stack.
if (Index == -1U && (Opcode == Instruction::ExtractElement ||
Opcode == Instruction::InsertElement)) {
// TODO: On some SSE41+ targets, we expand to cmp+splat+select patterns:
// inselt N0, N1, N2 --> select (SplatN2 == {0,1,2...}) ? SplatN1 : N0.
// TODO: Move this to BasicTTIImpl.h? We'd need better gep + index handling.
assert(isa<FixedVectorType>(Val) && "Fixed vector type expected");
Align VecAlign = DL.getPrefTypeAlign(Val);
Align SclAlign = DL.getPrefTypeAlign(ScalarType);
// Extract - store vector to stack, load scalar.
if (Opcode == Instruction::ExtractElement) {
return getMemoryOpCost(Instruction::Store, Val, VecAlign, 0,
TTI::TargetCostKind::TCK_RecipThroughput) +
getMemoryOpCost(Instruction::Load, ScalarType, SclAlign, 0,
TTI::TargetCostKind::TCK_RecipThroughput);
}
// Insert - store vector to stack, store scalar, load vector.
if (Opcode == Instruction::InsertElement) {
return getMemoryOpCost(Instruction::Store, Val, VecAlign, 0,
TTI::TargetCostKind::TCK_RecipThroughput) +
getMemoryOpCost(Instruction::Store, ScalarType, SclAlign, 0,
TTI::TargetCostKind::TCK_RecipThroughput) +
getMemoryOpCost(Instruction::Load, Val, VecAlign, 0,
TTI::TargetCostKind::TCK_RecipThroughput);
}
}
if (Index != -1U && (Opcode == Instruction::ExtractElement ||
Opcode == Instruction::InsertElement)) {
// Legalize the type.

View File

@ -16,7 +16,7 @@ define i32 @insert-extract-at-zero-idx(i32 %arg, float %fl) {
;CHECK: cost of 1 {{.*}} extract
%E = extractelement <8 x float> undef, i32 1
;CHECK: cost of 1 {{.*}} extract
;CHECK: cost of 3 {{.*}} extract
%F = extractelement <8 x float> undef, i32 %arg
;CHECK: cost of 0 {{.*}} insert

View File

@ -16,7 +16,7 @@ define i32 @insert-extract-at-zero-idx(i32 %arg, float %fl) {
;CHECK: cost of 1 {{.*}} extract
%E = extractelement <8 x float> undef, i32 1
;CHECK: cost of 1 {{.*}} extract
;CHECK: cost of 3 {{.*}} extract
%F = extractelement <8 x float> undef, i32 %arg
;CHECK: cost of 0 {{.*}} insert

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff