1
0
mirror of https://github.com/RPCS3/llvm-mirror.git synced 2024-11-22 18:54:02 +01:00

[ARM] Add VREV MVE shuffle costs

This uses the shuffle mask cost from D98206 to give a better cost of MVE
VREV instructions. This helps especially in VectorCombine where the cost
of shuffles is used to reorder bitcasts, which this helps keep the phase
ordering test for fp16 reductions producing optimal code. The isVREVMask
has been moved to a header file to allow it to be used across target
transform and isel lowering.

Differential Revision: https://reviews.llvm.org/D98210
This commit is contained in:
David Green 2021-03-17 21:21:43 +00:00
parent 57267d06c3
commit 6698320324
4 changed files with 57 additions and 47 deletions

View File

@ -21,6 +21,7 @@
#include "ARMRegisterInfo.h"
#include "ARMSelectionDAGInfo.h"
#include "ARMSubtarget.h"
#include "ARMTargetTransformInfo.h"
#include "MCTargetDesc/ARMAddressingModes.h"
#include "MCTargetDesc/ARMBaseInfo.h"
#include "Utils/ARMBaseInfo.h"
@ -6966,35 +6967,6 @@ static bool isVEXTMask(ArrayRef<int> M, EVT VT,
return true;
}
/// isVREVMask - Check if a vector shuffle corresponds to a VREV
/// instruction with the specified blocksize. (The order of the elements
/// within each block of the vector is reversed.)
static bool isVREVMask(ArrayRef<int> M, EVT VT, unsigned BlockSize) {
assert((BlockSize==16 || BlockSize==32 || BlockSize==64) &&
"Only possible block sizes for VREV are: 16, 32, 64");
unsigned EltSz = VT.getScalarSizeInBits();
if (EltSz == 64)
return false;
unsigned NumElts = VT.getVectorNumElements();
unsigned BlockElts = M[0] + 1;
// If the first shuffle index is UNDEF, be optimistic.
if (M[0] < 0)
BlockElts = BlockSize / EltSz;
if (BlockSize <= EltSz || BlockSize != BlockElts * EltSz)
return false;
for (unsigned i = 0; i < NumElts; ++i) {
if (M[i] < 0) continue; // ignore UNDEF indices
if ((unsigned) M[i] != (i - i%BlockElts) + (BlockElts - 1 - i%BlockElts))
return false;
}
return true;
}
static bool isVTBLMask(ArrayRef<int> M, EVT VT) {
// We can handle <8 x i8> vector shuffles. If the index in the mask is out of
// range, then 0 is placed into the resulting vector. So pretty much any mask

View File

@ -1219,7 +1219,16 @@ int ARMTTIImpl::getShuffleCost(TTI::ShuffleKind Kind, VectorType *Tp,
return LT.first * Entry->Cost *
ST->getMVEVectorCostFactor(TTI::TCK_RecipThroughput);
}
if (!Mask.empty()) {
std::pair<int, MVT> LT = TLI->getTypeLegalizationCost(DL, Tp);
if (Mask.size() <= LT.second.getVectorNumElements() &&
(isVREVMask(Mask, LT.second, 16) || isVREVMask(Mask, LT.second, 32) ||
isVREVMask(Mask, LT.second, 64)))
return ST->getMVEVectorCostFactor(TTI::TCK_RecipThroughput) * LT.first;
}
}
int BaseCost = ST->hasMVEIntegerOps() && Tp->isVectorTy()
? ST->getMVEVectorCostFactor(TTI::TCK_RecipThroughput)
: 1;

View File

@ -282,6 +282,35 @@ public:
/// @}
};
/// isVREVMask - Check if a vector shuffle corresponds to a VREV
/// instruction with the specified blocksize. (The order of the elements
/// within each block of the vector is reversed.)
inline bool isVREVMask(ArrayRef<int> M, EVT VT, unsigned BlockSize) {
assert((BlockSize == 16 || BlockSize == 32 || BlockSize == 64) &&
"Only possible block sizes for VREV are: 16, 32, 64");
unsigned EltSz = VT.getScalarSizeInBits();
if (EltSz != 8 && EltSz != 16 && EltSz != 32)
return false;
unsigned BlockElts = M[0] + 1;
// If the first shuffle index is UNDEF, be optimistic.
if (M[0] < 0)
BlockElts = BlockSize / EltSz;
if (BlockSize <= EltSz || BlockSize != BlockElts * EltSz)
return false;
for (unsigned i = 0, e = M.size(); i < e; ++i) {
if (M[i] < 0)
continue; // ignore UNDEF indices
if ((unsigned)M[i] != (i - i % BlockElts) + (BlockElts - 1 - i % BlockElts))
return false;
}
return true;
}
} // end namespace llvm
#endif // LLVM_LIB_TARGET_ARM_ARMTARGETTRANSFORMINFO_H

View File

@ -101,11 +101,11 @@ define void @reverse() {
; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 256 for instruction: %v8i32 = shufflevector <8 x i32> undef, <8 x i32> undef, <8 x i32> <i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0>
; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %v2i64 = shufflevector <2 x i64> undef, <2 x i64> undef, <2 x i32> <i32 1, i32 0>
; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 64 for instruction: %v4i64 = shufflevector <4 x i64> undef, <4 x i64> undef, <4 x i32> <i32 3, i32 2, i32 1, i32 0>
; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %v2f16 = shufflevector <2 x half> undef, <2 x half> undef, <2 x i32> <i32 1, i32 0>
; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 64 for instruction: %v4f16 = shufflevector <4 x half> undef, <4 x half> undef, <4 x i32> <i32 3, i32 2, i32 1, i32 0>
; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v2f16 = shufflevector <2 x half> undef, <2 x half> undef, <2 x i32> <i32 1, i32 0>
; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v4f16 = shufflevector <4 x half> undef, <4 x half> undef, <4 x i32> <i32 3, i32 2, i32 1, i32 0>
; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 256 for instruction: %v8f16 = shufflevector <8 x half> undef, <8 x half> undef, <8 x i32> <i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0>
; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 1024 for instruction: %v16f16 = shufflevector <16 x half> undef, <16 x half> undef, <16 x i32> <i32 15, i32 14, i32 13, i32 12, i32 11, i32 10, i32 9, i32 8, i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0>
; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %v2f32 = shufflevector <2 x float> undef, <2 x float> undef, <2 x i32> <i32 1, i32 0>
; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v2f32 = shufflevector <2 x float> undef, <2 x float> undef, <2 x i32> <i32 1, i32 0>
; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 64 for instruction: %v4f32 = shufflevector <4 x float> undef, <4 x float> undef, <4 x i32> <i32 3, i32 2, i32 1, i32 0>
; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 256 for instruction: %v8f32 = shufflevector <8 x float> undef, <8 x float> undef, <8 x i32> <i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0>
; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %v2f64 = shufflevector <2 x double> undef, <2 x double> undef, <2 x i32> <i32 1, i32 0>
@ -246,11 +246,11 @@ define void @select() {
; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 256 for instruction: %v8i32 = shufflevector <8 x i32> undef, <8 x i32> undef, <8 x i32> <i32 8, i32 1, i32 10, i32 11, i32 4, i32 5, i32 6, i32 15>
; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %v2i64 = shufflevector <2 x i64> undef, <2 x i64> undef, <2 x i32> <i32 1, i32 0>
; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 64 for instruction: %v4i64 = shufflevector <4 x i64> undef, <4 x i64> undef, <4 x i32> <i32 0, i32 5, i32 2, i32 7>
; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %v2f16 = shufflevector <2 x half> undef, <2 x half> undef, <2 x i32> <i32 1, i32 0>
; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v2f16 = shufflevector <2 x half> undef, <2 x half> undef, <2 x i32> <i32 1, i32 0>
; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 64 for instruction: %v4f16 = shufflevector <4 x half> undef, <4 x half> undef, <4 x i32> <i32 0, i32 5, i32 2, i32 7>
; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 256 for instruction: %v8f16 = shufflevector <8 x half> undef, <8 x half> undef, <8 x i32> <i32 8, i32 1, i32 10, i32 11, i32 4, i32 5, i32 6, i32 15>
; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 1024 for instruction: %v16f16 = shufflevector <16 x half> undef, <16 x half> undef, <16 x i32> <i32 0, i32 17, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %v2f32 = shufflevector <2 x float> undef, <2 x float> undef, <2 x i32> <i32 1, i32 0>
; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v2f32 = shufflevector <2 x float> undef, <2 x float> undef, <2 x i32> <i32 1, i32 0>
; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 64 for instruction: %v4f32 = shufflevector <4 x float> undef, <4 x float> undef, <4 x i32> <i32 0, i32 5, i32 2, i32 7>
; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 256 for instruction: %v8f32 = shufflevector <8 x float> undef, <8 x float> undef, <8 x i32> <i32 8, i32 1, i32 10, i32 11, i32 4, i32 5, i32 6, i32 15>
; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %v2f64 = shufflevector <2 x double> undef, <2 x double> undef, <2 x i32> <i32 1, i32 0>
@ -316,19 +316,19 @@ define void @select() {
define void @vrev2() {
; CHECK-MVE-LABEL: 'vrev2'
; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 64 for instruction: %v4i8 = shufflevector <4 x i8> undef, <4 x i8> undef, <4 x i32> <i32 1, i32 0, i32 3, i32 2>
; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 256 for instruction: %v8i8 = shufflevector <8 x i8> undef, <8 x i8> undef, <8 x i32> <i32 1, i32 0, i32 3, i32 2, i32 5, i32 4, i32 7, i32 6>
; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 1024 for instruction: %v16i8 = shufflevector <16 x i8> undef, <16 x i8> undef, <16 x i32> <i32 1, i32 0, i32 3, i32 2, i32 5, i32 4, i32 7, i32 6, i32 9, i32 8, i32 11, i32 10, i32 13, i32 12, i32 15, i32 14>
; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 64 for instruction: %v4i16 = shufflevector <4 x i16> undef, <4 x i16> undef, <4 x i32> <i32 1, i32 0, i32 3, i32 2>
; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 256 for instruction: %v8i16 = shufflevector <8 x i16> undef, <8 x i16> undef, <8 x i32> <i32 1, i32 0, i32 3, i32 2, i32 5, i32 4, i32 7, i32 6>
; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v4i8 = shufflevector <4 x i8> undef, <4 x i8> undef, <4 x i32> <i32 1, i32 0, i32 3, i32 2>
; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v8i8 = shufflevector <8 x i8> undef, <8 x i8> undef, <8 x i32> <i32 1, i32 0, i32 3, i32 2, i32 5, i32 4, i32 7, i32 6>
; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v16i8 = shufflevector <16 x i8> undef, <16 x i8> undef, <16 x i32> <i32 1, i32 0, i32 3, i32 2, i32 5, i32 4, i32 7, i32 6, i32 9, i32 8, i32 11, i32 10, i32 13, i32 12, i32 15, i32 14>
; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v4i16 = shufflevector <4 x i16> undef, <4 x i16> undef, <4 x i32> <i32 1, i32 0, i32 3, i32 2>
; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v8i16 = shufflevector <8 x i16> undef, <8 x i16> undef, <8 x i32> <i32 1, i32 0, i32 3, i32 2, i32 5, i32 4, i32 7, i32 6>
; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 1024 for instruction: %v16i16 = shufflevector <16 x i16> undef, <16 x i16> undef, <16 x i32> <i32 1, i32 0, i32 3, i32 2, i32 5, i32 4, i32 7, i32 6, i32 9, i32 8, i32 11, i32 10, i32 13, i32 12, i32 15, i32 14>
; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 64 for instruction: %v4i32 = shufflevector <4 x i32> undef, <4 x i32> undef, <4 x i32> <i32 1, i32 0, i32 3, i32 2>
; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v4i32 = shufflevector <4 x i32> undef, <4 x i32> undef, <4 x i32> <i32 1, i32 0, i32 3, i32 2>
; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 256 for instruction: %v8i32 = shufflevector <8 x i32> undef, <8 x i32> undef, <8 x i32> <i32 1, i32 0, i32 3, i32 2, i32 5, i32 4, i32 7, i32 6>
; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 64 for instruction: %v4i64 = shufflevector <4 x i64> undef, <4 x i64> undef, <4 x i32> <i32 1, i32 0, i32 3, i32 2>
; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 64 for instruction: %v4f16 = shufflevector <4 x half> undef, <4 x half> undef, <4 x i32> <i32 1, i32 0, i32 3, i32 2>
; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 256 for instruction: %v8f16 = shufflevector <8 x half> undef, <8 x half> undef, <8 x i32> <i32 1, i32 0, i32 3, i32 2, i32 5, i32 4, i32 7, i32 6>
; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v4f16 = shufflevector <4 x half> undef, <4 x half> undef, <4 x i32> <i32 1, i32 0, i32 3, i32 2>
; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v8f16 = shufflevector <8 x half> undef, <8 x half> undef, <8 x i32> <i32 1, i32 0, i32 3, i32 2, i32 5, i32 4, i32 7, i32 6>
; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 1024 for instruction: %v16f16 = shufflevector <16 x half> undef, <16 x half> undef, <16 x i32> <i32 1, i32 0, i32 3, i32 2, i32 5, i32 4, i32 7, i32 6, i32 9, i32 8, i32 11, i32 10, i32 13, i32 12, i32 15, i32 14>
; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 64 for instruction: %v4f32 = shufflevector <4 x float> undef, <4 x float> undef, <4 x i32> <i32 1, i32 0, i32 3, i32 2>
; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v4f32 = shufflevector <4 x float> undef, <4 x float> undef, <4 x i32> <i32 1, i32 0, i32 3, i32 2>
; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 256 for instruction: %v8f32 = shufflevector <8 x float> undef, <8 x float> undef, <8 x i32> <i32 1, i32 0, i32 3, i32 2, i32 5, i32 4, i32 7, i32 6>
; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 64 for instruction: %v4f64 = shufflevector <4 x double> undef, <4 x double> undef, <4 x i32> <i32 1, i32 0, i32 3, i32 2>
; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void
@ -378,12 +378,12 @@ define void @vrev2() {
define void @vrev4() {
; CHECK-MVE-LABEL: 'vrev4'
; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 256 for instruction: %v8i8 = shufflevector <8 x i8> undef, <8 x i8> undef, <8 x i32> <i32 3, i32 2, i32 1, i32 0, i32 7, i32 6, i32 5, i32 4>
; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 1024 for instruction: %v16i8 = shufflevector <16 x i8> undef, <16 x i8> undef, <16 x i32> <i32 3, i32 2, i32 1, i32 0, i32 7, i32 6, i32 5, i32 4, i32 11, i32 10, i32 9, i32 8, i32 15, i32 14, i32 13, i32 12>
; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 256 for instruction: %v8i16 = shufflevector <8 x i16> undef, <8 x i16> undef, <8 x i32> <i32 3, i32 2, i32 1, i32 0, i32 7, i32 6, i32 5, i32 4>
; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v8i8 = shufflevector <8 x i8> undef, <8 x i8> undef, <8 x i32> <i32 3, i32 2, i32 1, i32 0, i32 7, i32 6, i32 5, i32 4>
; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v16i8 = shufflevector <16 x i8> undef, <16 x i8> undef, <16 x i32> <i32 3, i32 2, i32 1, i32 0, i32 7, i32 6, i32 5, i32 4, i32 11, i32 10, i32 9, i32 8, i32 15, i32 14, i32 13, i32 12>
; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v8i16 = shufflevector <8 x i16> undef, <8 x i16> undef, <8 x i32> <i32 3, i32 2, i32 1, i32 0, i32 7, i32 6, i32 5, i32 4>
; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 1024 for instruction: %v16i16 = shufflevector <16 x i16> undef, <16 x i16> undef, <16 x i32> <i32 3, i32 2, i32 1, i32 0, i32 7, i32 6, i32 5, i32 4, i32 11, i32 10, i32 9, i32 8, i32 15, i32 14, i32 13, i32 12>
; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 256 for instruction: %v8i32 = shufflevector <8 x i32> undef, <8 x i32> undef, <8 x i32> <i32 3, i32 2, i32 1, i32 0, i32 7, i32 6, i32 5, i32 4>
; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 256 for instruction: %v8f16 = shufflevector <8 x half> undef, <8 x half> undef, <8 x i32> <i32 3, i32 2, i32 1, i32 0, i32 7, i32 6, i32 5, i32 4>
; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v8f16 = shufflevector <8 x half> undef, <8 x half> undef, <8 x i32> <i32 3, i32 2, i32 1, i32 0, i32 7, i32 6, i32 5, i32 4>
; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 1024 for instruction: %v16f16 = shufflevector <16 x half> undef, <16 x half> undef, <16 x i32> <i32 3, i32 2, i32 1, i32 0, i32 7, i32 6, i32 5, i32 4, i32 11, i32 10, i32 9, i32 8, i32 15, i32 14, i32 13, i32 12>
; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 256 for instruction: %v8f32 = shufflevector <8 x float> undef, <8 x float> undef, <8 x i32> <i32 3, i32 2, i32 1, i32 0, i32 7, i32 6, i32 5, i32 4>
; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void