1
0
mirror of https://github.com/RPCS3/llvm-mirror.git synced 2025-02-01 05:01:59 +01:00

[X86] X86TTIImpl::getInterleavedMemoryOpCostAVX2(): use getMemoryOpCost()

Now that getMemoryOpCost() correctly handles all the vector variants,
we should no longer hand-roll our own version of it, but use it directly.

The AVX512 variant probably needs a similar change,
but there it is less obvious.
This commit is contained in:
Roman Lebedev 2021-05-11 16:09:10 +03:00
parent e1a045d607
commit a7f61f4671
3 changed files with 11 additions and 19 deletions

View File

@ -4687,17 +4687,9 @@ InstructionCost X86TTIImpl::getInterleavedMemoryOpCostAVX2(
unsigned VF = VecTy->getNumElements() / Factor;
Type *ScalarTy = VecTy->getElementType();
// Calculate the number of memory operations (NumOfMemOps), required
// for load/store the VecTy.
unsigned VecTySize = DL.getTypeStoreSize(VecTy);
unsigned LegalVTSize = LegalVT.getStoreSize();
unsigned NumOfMemOps = (VecTySize + LegalVTSize - 1) / LegalVTSize;
// Get the cost of one memory operation.
auto *SingleMemOpTy = FixedVectorType::get(VecTy->getElementType(),
LegalVT.getVectorNumElements());
InstructionCost MemOpCost = getMemoryOpCost(
Opcode, SingleMemOpTy, MaybeAlign(Alignment), AddressSpace, CostKind);
// Get the cost of all the memory operations.
InstructionCost MemOpCosts = getMemoryOpCost(
Opcode, VecTy, MaybeAlign(Alignment), AddressSpace, CostKind);
auto *VT = FixedVectorType::get(ScalarTy, VF);
EVT ETy = TLI->getValueType(DL, VT);
@ -4753,13 +4745,13 @@ InstructionCost X86TTIImpl::getInterleavedMemoryOpCostAVX2(
if (Opcode == Instruction::Load) {
if (const auto *Entry =
CostTableLookup(AVX2InterleavedLoadTbl, Factor, ETy.getSimpleVT()))
return NumOfMemOps * MemOpCost + Entry->Cost;
return MemOpCosts + Entry->Cost;
} else {
assert(Opcode == Instruction::Store &&
"Expected Store Instruction at this point");
if (const auto *Entry =
CostTableLookup(AVX2InterleavedStoreTbl, Factor, ETy.getSimpleVT()))
return NumOfMemOps * MemOpCost + Entry->Cost;
return MemOpCosts + Entry->Cost;
}
return BaseT::getInterleavedMemoryOpCost(Opcode, VecTy, Factor, Indices,

View File

@ -7,9 +7,9 @@ target triple = "x86_64-unknown-linux-gnu"
; Function Attrs: norecurse nounwind readonly uwtable
define i32 @doit_stride3(i8* nocapture readonly %Ptr, i32 %Nels) {
;CHECK: LV: Found an estimated cost of 1 for VF 1 For instruction: %0 = load i8
;CHECK: LV: Found an estimated cost of 11 for VF 2 For instruction: %0 = load i8
;CHECK: LV: Found an estimated cost of 5 for VF 4 For instruction: %0 = load i8
;CHECK: LV: Found an estimated cost of 10 for VF 8 For instruction: %0 = load i8
;CHECK: LV: Found an estimated cost of 13 for VF 2 For instruction: %0 = load i8
;CHECK: LV: Found an estimated cost of 7 for VF 4 For instruction: %0 = load i8
;CHECK: LV: Found an estimated cost of 12 for VF 8 For instruction: %0 = load i8
;CHECK: LV: Found an estimated cost of 13 for VF 16 For instruction: %0 = load i8
;CHECK: LV: Found an estimated cost of 16 for VF 32 For instruction: %0 = load i8
entry:

View File

@ -7,9 +7,9 @@ target triple = "x86_64-unknown-linux-gnu"
; Function Attrs: norecurse nounwind uwtable
define void @doit_stride3(i8* nocapture %Ptr, i32 %Nels) local_unnamed_addr {
;CHECK: LV: Found an estimated cost of 1 for VF 1 For instruction: store i8 %conv4
;CHECK: LV: Found an estimated cost of 8 for VF 2 For instruction: store i8 %conv4
;CHECK: LV: Found an estimated cost of 9 for VF 4 For instruction: store i8 %conv4
;CHECK: LV: Found an estimated cost of 12 for VF 8 For instruction: store i8 %conv4
;CHECK: LV: Found an estimated cost of 10 for VF 2 For instruction: store i8 %conv4
;CHECK: LV: Found an estimated cost of 11 for VF 4 For instruction: store i8 %conv4
;CHECK: LV: Found an estimated cost of 14 for VF 8 For instruction: store i8 %conv4
;CHECK: LV: Found an estimated cost of 13 for VF 16 For instruction: store i8 %conv4
;CHECK: LV: Found an estimated cost of 16 for VF 32 For instruction: store i8 %conv4
entry: